Setup

# Load Required libraries:
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
## 
##     intersect, t
library(URD)
## Loading required package: ggplot2
## Loading required package: Matrix
## Registered S3 method overwritten by 'gplots':
##   method         from 
##   reorder.factor gdata
# set working directory and make output folders
setwd("/data/CSD/2025-Juliano-Collab/hydra_neuron_URD_Rmarkdown")

dir.create("Figures/")
dir.create("Figures/QC")
dir.create("objects")


# Using the same colors that were used to plot the seurat umap:
curated.ident.colors <- c(ec4A = "#D3813C", ec4B = "#798E96", ec2 = "#8E486D", `ec2 precursor` = "#60BAE8",
    ec1B = "#4B8256", ec1E = "#E57BA2", ec1C = "#8F9BCD", ec1D = "#727E99", ec1A = "#69C3C4",
    `ec1 precursor` = "#B887BB", progen2 = "#7D5DA8", ISC = "#DF7CB0", progen1 = "#F3BA67",
    ec5 = "#968FC4", `en precursor` = "#B95650", en1 = "#578EC9", en3 = "#88B759",
    en2 = "#E8887C", ec3A = "#60B960", ec3B = "#5C4081", ec3C = "#62C5D9", `ec3 precursor` = "#CC81B4")

# set seed:
set.seed(1212)

Load data

Here we are loading the seurat object that contains the gene expression, batch corrected expression, meta data, and umap coordinants. We are also loading the initial URD object that did not include ec1E, ec1C, or ec1D, but contains the NMF module information.

seurat.obj <- readRDS("/data/CSD/Michael/hydra/objects/hydra.neuron.seurat_updated.HML.RDS")
old.urd.obj <- readRDS("/data/CSD/Michael/hydra/objects/hydra.neuron.urd_updated.idents.RDS")

Subset data

ec1B is a very large cluster with few differences between subclusters. We are therefore subsetting it to reduce the number of cells. However, unlike the initial URD run, we will be including ec1E, ec1C, and ec1D in this object.

# plot the cells by ident and sub-clusters
DimPlot(seurat.obj, group.by = "curatedIdent", label = T, repel = T) + NoLegend() +
    NoAxes() | DimPlot(seurat.obj, group.by = "seurat_clusters", label = T, repel = T) +
    NoLegend() + NoAxes()

# Removing seurat clusters 1,9,16 to only keep part of the ec1B sub-cluster
cells.exclude <- rownames(seurat.obj@meta.data[seurat.obj@meta.data$seurat_clusters %in%
    c("1", "9", "16"), ])
cells.keep <- setdiff(rownames(seurat.obj@meta.data), cells.exclude)

# Subset the seurat object:
seurat.obj <- subset(seurat.obj, cells = cells.keep)

Create URD object

URD currently does not include a function to convert the new seurat object layouts to an URD object. We are currently working on URD V2 that will include this function, but in the meantime we make a new function to convert the subsetted seurat object to an URD object. To deal with batch effects, we will create the trajectory using seurat’s batch corrected “integrated” data. After creating the trajectory, we will change the gene expression slot of the URD object back to the SCT normalized data. We are also going to remove Dropseq data that has very large batch effects.

# Need to use a new function to create URD object from the new Seurat object
# format.
SeuratToURDv3 <- function(seurat.object) {
    if (requireNamespace("Seurat", quietly = TRUE)) {
        # Create an empty URD object
        ds <- new("URD")

        # Copy over data
        ds@logupx.data <- as(as.matrix(seurat.object@assays$RNA@data), "dgCMatrix")
        if (!any(dim(seurat.object@assays$RNA@counts) == 0))
            ds@count.data <- as(as.matrix(seurat.object@assays$RNA@counts[rownames(seurat.object@assays$RNA@data),
                colnames(seurat.object@assays$RNA@data)]), "dgCMatrix")

        # Copy over metadata TO DO - grab kmeans clustering info
        get.data <- NULL
        if (.hasSlot(seurat.object, "data.info")) {
            get.data <- as.data.frame(seurat.object@assays$RNA@data.info)
        } else if (.hasSlot(seurat.object, "meta.data")) {
            get.data <- as.data.frame(seurat.object@meta.data)
        }
        if (!is.null(get.data)) {
            di <- colnames(get.data)
            m <- grep("res|cluster|Res|Cluster", di, value = T, invert = T)  # Put as metadata if it's not the result of a clustering.
            discrete <- apply(get.data, 2, function(x) length(unique(x))/length(x))
            gi <- di[which(discrete <= 0.015)]
            ds@meta <- get.data[, m, drop = F]
            ds@group.ids <- get.data[, gi, drop = F]
        }

        # Copy over var.genes
        if (length(seurat.object@assays$RNA@var.features > 0))
            ds@var.genes <- seurat.object@assays$RNA@var.features

        # Move over tSNE projection
        if (.hasSlot(seurat.object, "tsne.rot")) {
            if (!any(dim(seurat.object@tsne.rot) == 0)) {
                ds@tsne.y <- as.data.frame(seurat.object@tsne.rot)
                colnames(ds@tsne.y) <- c("tSNE1", "tSNE2")
            }
        } else if (.hasSlot(seurat.object, "reductions")) {
            if (("umap" %in% names(seurat.object@reductions)) && !any(dim(seurat.object@reductions$umap) ==
                0)) {
                ds@tsne.y <- as.data.frame(seurat.object@reductions$umap@cell.embeddings)
                colnames(ds@tsne.y) <- c("tSNE1", "tSNE2")
            }
        }

        # Move over PCA results
        if (.hasSlot(seurat.object, "pca.x")) {
            if (!any(dim(seurat.object@pca.x) == 0)) {
                ds@pca.load <- seurat.object@pca.x
                ds@pca.scores <- seurat.object@pca.rot
                warning("Need to set which PCs are significant in @pca.sig")
            }
            ## TO DO: Convert SVD to sdev
        } else if (.hasSlot(seurat.object, "reductions")) {
            if (("pca" %in% names(seurat.object@reductions)) && !any(dim(Loadings(seurat.object,
                reduction = "pca")) == 0)) {
                ds@pca.load <- as.data.frame(Loadings(seurat.object, reduction = "pca"))
                ds@pca.scores <- as.data.frame(seurat.object@reductions$pca@cell.embeddings)
                ds@pca.sdev <- seurat.object@reductions$pca@stdev
                ds@pca.sig <- pcaMarchenkoPastur(M = dim(ds@pca.scores)[1], N = dim(ds@pca.load)[1],
                  pca.sdev = ds@pca.sdev)
            }
        }
        return(ds)
    } else {
        stop("Package Seurat is required for this function. To install: install.packages('Seurat')\n")
    }
}

urd.obj <- SeuratToURDv3(seurat.obj)
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 4.9 GiB
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 4.9 GiB
## [1] "Marchenko-Pastur eigenvalue null upper bound: 17.2863780567754"
## [1] "16 PCs have larger eigenvalues."
# To deal with batch effects, we will use the batch corrected 'integrated' data
# from Seurat to create the URD trajectory. After creating the trajectory, we
# will use the SCT normalized gene expression for all plots
urd.obj@logupx.data <- seurat.obj@assays$integrated$data

# Removing Dropseq cells that in initial analyses caused large batch effects.
urd.obj <- urdSubset(urd.obj, cells.keep = cellsInCluster(urd.obj, "batch", c("10x.bud1",
    "10x.bud2", "10x.nGreen", "10x.PN")))

# Remove the seurat obj to clear some RAM
rm(seurat.obj)

Removing outliers

Before creating a trajectory, we need to remove outliers and doublets. There also may be multiple transdifferentiating events to make ec1E cells, so we will also have to remove some cells from ec1E to represent the most likely route to ec1E cells.

Removing ouliers based on k-nearest neighbors

Outlier cells are poorly connected to the main data set and often disrupt trajectory reconstruction. Thus, we calculate k-nearest neighbor networks (KNN) and identify outlier cells based on their distance to their nearest neighbors. Here we remove 242 outliers based on KNN networks.

# First, we calculate a list of variable genes
var.genes <- findVariableGenes(object = urd.obj, set.object.var.genes = F, diffCV.cutoff = 0.45,
    do.plot = T)
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 4.1 GiB
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 4.1 GiB
## Warning in xy.coords(x, y, xlabel, ylabel, log): 375 x values <= 0 omitted from
## logarithmic plot

urd.obj@var.genes <- var.genes

# Calculate k-nearest neighbor networks
urd.obj <- calcKNN(urd.obj, genes.use = rownames(urd.obj@logupx.data), nn = 100)

outliers.knn <- knnOutliers(urd.obj, x.max = 86, slope.r = 0.12, int.r = 79, slope.b = 1.1,
    int.b = 7.75)

# Because we will remove cells based on several criteria, we will make a list
# of cells to remove for different reasons that we can plot later:
cells.remove.list <- list(knn.outliers = outliers.knn)

# Visualize outlier cells
urd.obj <- groupFromCells(urd.obj, group.id = "knn.outliers", cells = outliers.knn)

plotDimHighlight(urd.obj, "knn.outliers", "True", plot.title = "Outliers from k-Nearest Neighbors",
    highlight.color = "#00BFC4")
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the URD package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Removing doublets using NMF module co-expression

Doublets are formed at low frequencies when using a droplet-based scRNA-seq technique when multiple cells are encapsulated in a single droplet. Since these can create spurious connections that interfere with trajectory inference, we aimed to remove them by identifying cells with co-expression of typically mutually exclusive gene expression programs (Siebert et al., 2019). To do this, we identify the modules that are cell type specific and use functions in URD to identify thresholds for choosing cells to eliminate based on expression of cell-type specific modules that should not be co-expressed. We exclude ICSs and progenitors from this analysis as they are often co-expressing different modules as thei undergo differentiation.

# Load the broad NMF list from the old urd object.  We normalized NMF scores
# across all cells in the object by scaling such that each module’s expression
# ranged from 0-1 across all cells in the previous object.
urd.obj@nmf.c1 <- old.urd.obj@nmf.c1

# Make a plot list of each NMF module to visualize their expression and plot
# for each.
plot.list <- lapply(colnames(urd.obj@nmf.c1), function(x) plotDim(urd.obj, x, point.size = 0.5,
    alpha = 0.75))
names(plot.list) <- colnames(urd.obj@nmf.c1)
dir.create("Figures/NMF_umaps")
for (plot in names(plot.list)) {
    png(filename = paste0("Figures/NMF_umaps/", plot, ".png"), width = 300 * 10,
        height = 300 * 9, res = 300)
    print(plot.list[[plot]])
    dev.off()
}
# NMF Modules for for each cluster: en1: 20 en2: 21, en3: 1, ec1A: 27 ec1B: 4
# ec1C: 23 ec1D: 16 ec1E: 8 ec2: 14, ec3A: 19, ec3B: 13, ec3C: 13, ec4: 11, 22,
# ec5: 12

# NMF modules to use for doublet detection:
nmf.broad.subtype.use <- c("X20", "X21", "X1", "X27", "X4", "X23", "X16", "X8", "X14",
    "X19", "X13", "X11", "X22", "X12")

Next, we determine overlaps between module pairs. This is performing a pair-wise comparison between each NMF and identifying how many cells express each NMF on their own within a “module threshold”. Then, we are looking at the fraction of cells that overlap with both the high and low thresholds between each NMF. There are two thresholds used to determine whether a cell expresses a module: Distinct and Overlapping. Distinct is the number of co-expressing cells is similar with either threshold. Overlapping is the number of cells that co-express the modules increase dramatically as the threshold is lowered.

# considers NMF module expression within cells pairwise
nmf.doublet.combos <- NMFDoubletsDefineModules(urd.obj, modules.use = nmf.broad.subtype.use,
    module.thresh.high = 0.3, module.thresh.low = 0.15)

# Plot module overlaps
NMFDoubletsPlotModuleThresholds(nmf.doublet.combos, frac.overlap.max = 0.07, frac.overlap.diff.max = 0.15)

We identified 1416 doublets, 10 of which were also identified as knn outliers:

# Identify cells that express non-overlapping NMF modules, which we assume to
# be doublets
nmf.doublets <- NMFDoubletsDetermineCells(urd.obj, nmf.doublet.combos, module.expressed.thresh = 0.2,
    frac.overlap.max = 0.07, frac.overlap.diff.max = 0.15)

# add to the list of cells to remove:
cells.remove.list$nmf.doublets <- nmf.doublets

length(nmf.doublets)  #1416
## [1] 1416
intersect(nmf.doublets, outliers.knn)  #10 doublets are also outliers
##  [1] "CCTGCATTCTCCCAAC-1_2" "GACTTCCGTTCTCTCG-1_2" "ATCCACCGTACAACGG-1_4"
##  [4] "TCAGTCCCATCTGCGG-1_4" "CGTGCTTGTGTAACGG-1_4" "AGGTCATCACTTGGGC-1_4"
##  [7] "GGGTCACGTGTGTGGA-1_3" "TCAAGACGTATAGGAT-1_4" "GACTGATGTTTAGTCG-1_3"
## [10] "GGGTGAATCCTGTAGA-1_1"
# Visualize doublets
urd.obj <- groupFromCells(urd.obj, "doublets", nmf.doublets)

plotDim(urd.obj, "doublets", plot.title = "Cells that express two distinct NMF modules")

Crop outliers and doublets

# Crop the data based on nmf doublets and knn outliers:
cells.remove <- unique(c(outliers.knn, nmf.doublets))
cells.keep <- setdiff(colnames(urd.obj@logupx.data), cells.remove)

# Visualize The cells to remove:
urd.obj@group.ids[cells.keep, "cells.to.keep"] <- "keep"
urd.obj@group.ids[cells.remove, "cells.to.keep"] <- "remove"
plotDim(urd.obj, "cells.to.keep", plot.title = "Cells to remove (doublets & outliers)")

length(cells.remove)  #removing 1648 cells.
## [1] 1648
# Subset the data
urd.obj <- urdSubset(urd.obj, cells.keep = cells.keep)

Calculate Diffusion map and transition matrix

Next we calculate a diffusion map. These parameters were chosen by comparing results from several runs with varying parameters. When selecting the final diffusion map, we looked for (1) strong connections between differentiated neurons and progenitors, and (2) low promiscuity between different groups of terminally differentiated cells that we hypothesized shouldn’t be related (and for which we did not see cells with intermediate gene expression states).

dir.create("Figures/Transitions")

# Calculate a new set of variable genes after removing doublets and outlier
# cells
var.genes <- findVariableGenes(object = urd.obj, set.object.var.genes = F, diffCV.cutoff = 0.45,
    do.plot = T)
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 3.8 GiB
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 3.8 GiB
## Warning in xy.coords(x, y, xlabel, ylabel, log): 399 x values <= 0 omitted from
## logarithmic plot

urd.obj@var.genes <- var.genes

# Calculate PCA:
urd.obj <- calcPCA(urd.obj, genes.use = rownames(urd.obj@logupx.data), mp.factor = 2)
## [1] "2025-09-02 10:59:20.919028: Centering and scaling data."
## [1] "2025-09-02 10:59:25.406105: Removing genes with no variation."
## [1] "2025-09-02 10:59:27.022041: Calculating PCA."
## [1] "2025-09-02 11:00:14.616209: Estimating significant PCs."
## [1] "Marchenko-Pastur eigenvalue null upper bound: 1.846342548734"
## [1] "28 PCs have eigenvalues larger than 2 times null upper bound."
## [1] "Storing 56 PCs."
# Calculate the Diffusion Map:
urd.obj <- calcDM(urd.obj, genes.use = rownames(urd.obj@logupx.data), knn = 100,
    sigma.use = "local", distance = "cosine")
## [1] "Using local sigma."
## Warning in DiffusionMap(data.use, sigma = sigma.use, k = knn, n_eigs =
## dcs.store, : You have 3000 genes. Consider passing e.g. n_pcs = 50 to speed up
## computation.
## Warning: 'as(<dsCMatrix>, "dsTMatrix")' is deprecated.
## Use 'as(., "TsparseMatrix")' instead.
## See help("Deprecated") and help("Matrix-deprecated").
plotDim(urd.obj, label = "curatedIdent", transitions.plot = 10000, plot.title = "knn100, sigma local, cosine (with transitions)",
    point.size = 0.5, alpha = 0.25)

plotDimArray(urd.obj, reduction.use = "dm", dims.to.plot = 1:16, outer.title = "Diffusion Map (Sigma local, knn100)",
    label = "curatedIdent", plot.title = "", legend = F)

Identify ec1E outliers

The ec1E cluster does not have a clear path from one specific cluster. Because URD works by performing random walks back from tips and determining when walks converge, it is unable to decifer multiple differentiation paths. We will therefore determine the most likely differentiation path of ec1E and may have to remove cells that represent the less likely differentiation paths. From the umap and transition probabilities it looks like ec1E may transdifferentiate from ec1C. Based on their biological locations. This makes sense, but there also appear to be other possible routes for ec1E to differentiate. We will use the diffusion map to quantify the most likely path to ec1E. We also want to determine the most likely path from “early” differentiating ec1C cells vs. “late” differentiated ec1C cells, so we will subcluster the data to determine the early and late ec1C cells.

# First, we will subcluster cells like we will to find tips, but to identify
# the 'early' and 'late' ec1C cells:
dir.create("Figures/ec1E_outliers")

# Subset the object to idents that may contain tip cells:
tip.idents <- c("en1", "en2", "en3", "ec2", "ec3A", "ec3B", "ec3C", "ec4A", "ec5",
    "ec1C", "ec1E", "ec1B", "ec1D")

# subset urd
subset.obj <- urdSubset(urd.obj, cells.keep = cellsInCluster(urd.obj, clustering = "curatedIdent",
    cluster = tip.idents))

# perform louvain clustering
set.seed(1212)
subset.obj <- graphClustering(subset.obj, do.jaccard = T, method = "Infomap", num.nn = c(120))
set.seed(1212)

plotDim(subset.obj, label = "Infomap-120", label.clusters = T, legend = F)

# Make a list of the ec1C subclusters: ec1C 'early' cells = '24', ec1C 'late'
# cells = '20'
ec1C.cells <- list(early = rownames(subset.obj@group.ids[subset.obj@group.ids$curatedIdent ==
    "ec1C" & subset.obj@group.ids$`Infomap-120` == "24", ]), late = rownames(subset.obj@group.ids[subset.obj@group.ids$curatedIdent ==
    "ec1C" & subset.obj@group.ids$`Infomap-120` == "20", ]))
# add any other ec1C cells that are not a part of clusters '25' or '20'
ec1C.cells$other <- setdiff(rownames(urd.obj@group.ids[urd.obj@group.ids$curatedIdent ==
    "ec1C", ]), unlist(ec1C.cells))

# add ec1C clustering information to the object:
urd.obj@group.ids$idents.split.ec1C <- as.character(urd.obj@group.ids$curatedIdent)
urd.obj@group.ids[ec1C.cells$early, "idents.split.ec1C"] <- "ec1C-early"
urd.obj@group.ids[ec1C.cells$late, "idents.split.ec1C"] <- "ec1C-late"
urd.obj@group.ids[ec1C.cells$other, "idents.split.ec1C"] <- "ec1C-other"

plotDim(urd.obj, label = "idents.split.ec1C", label.clusters = T, legend = F)

table(urd.obj@group.ids$idents.split.ec1C)
## 
## ec1 precursor          ec1A          ec1B    ec1C-early     ec1C-late 
##           596          1080          1799           307           353 
##    ec1C-other          ec1D          ec1E           ec2 ec2 precursor 
##             1           501           237          2332           381 
## ec3 precursor          ec3A          ec3B          ec3C          ec4A 
##           358           511           992          1112          3343 
##          ec4B           ec5  en precursor           en1           en2 
##           701          1365           243           920          2095 
##           en3           ISC       progen1       progen2 
##          1265           716           953          1142
# one cell doesn't fall into either ec1C-early or late, should probably be
# removed

To identify the most likely path to ec1E, we will take the mean of transitions between each ec1E cell and cells in each cluster. This shows that the highest transition probabilities (unsurprisingly) are between ec1E and other ec1E cells. However, the next highest probabilities are between ec1E and “late” ec1C cells. We therefore believe that the most likely path to ec1E cells is through trans-differentiation of ec1C. However, we want to recognize that based on this quantification, ec1E may also arrise independently from ec1 precursors, and/or trans-differentiate from ec1B.

# Sum the transition probabilites to/from ec1E cells and other clusters.

# Calculate transition probabilities:
tm <- floodBuildTM(urd.obj)

# get colmeans for ec1E cells to cells from each other population:
tm.ec1E <- tm[, rownames(urd.obj@group.ids[urd.obj@group.ids$idents.split.ec1C ==
    "ec1E", ])]
# remove that one weird ec1C-other cell since colMeans() won't work for one
# cell.
tm.ec1E <- tm.ec1E[setdiff(rownames(tm.ec1E), ec1C.cells$other), ]
clusters.test <- setdiff(unique(urd.obj@group.ids$idents.split.ec1C), "ec1C-other")


# for each celltype, get colmeans of ec1E cells to all cells in that cluster in
# tm.ec1E:
ec1E.transition.colmeans <- lapply(clusters.test, function(cluster) {
    cells.compare <- rownames(urd.obj@group.ids[urd.obj@group.ids$idents.split.ec1C ==
        cluster, ])
    return(colMeans(tm.ec1E[cells.compare, ]))
})
ec1E.transition.colmeans <- Reduce(rbind, ec1E.transition.colmeans)
rownames(ec1E.transition.colmeans) <- clusters.test

# convert to data.frame for plotting:
ec1E.transition.colmeans.df <- reshape2::melt(ec1E.transition.colmeans)
colnames(ec1E.transition.colmeans.df) <- c("curatedIdent", "Cell", "sum.tm")
# order clusters based on the mean transitions from ec1E:
plot.ident.order <- sapply(unique(as.character(ec1E.transition.colmeans.df$curatedIdent)),
    function(x) mean(ec1E.transition.colmeans.df[ec1E.transition.colmeans.df$curatedIdent ==
        x, "sum.tm"]))
ec1E.transition.colmeans.df$curatedIdent <- factor(ec1E.transition.colmeans.df$curatedIdent,
    levels = names(plot.ident.order[order(plot.ident.order, decreasing = T)]))

# plot Violin plot of transition probability sums for each cell:
ggplot(ec1E.transition.colmeans.df, mapping = aes(x = curatedIdent, y = sum.tm)) +
    geom_violin(aes(fill = curatedIdent), scale = "width") + geom_jitter(size = 0.5,
    alpha = 0.5, width = 0.33) + scale_fill_manual(values = curated.ident.colors) +
    theme_bw() + guides(fill = "none") + labs(x = NULL, y = "Mean Transition Probabilities",
    title = "Mean Transition Probabilities from ec1E to each curatedIdent") + theme(plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

We are going to remove ec1E cells that have high transition probabilities to clusters other than ec1E or ec1C using a threshold of the mean probability of 2e-05 in clusters other than ec1E or ec1C. We can plot these cells based which cluster (other than ec1E or ec1C) they have the highest transitions to.

# get outliers:
ec1E.outliers <- unique(ec1E.transition.colmeans.df[!(ec1E.transition.colmeans.df$curatedIdent %in%
    c("ec1E", "ec1C-early", "ec1C-late")) & ec1E.transition.colmeans.df$sum.tm >
    2e-05, "Cell"])
# unfactor so that sapply returns named character with cells as names in the
# next line:
ec1E.outliers <- as.character(ec1E.outliers)

# get the curatedIdent with highest transition probablity sum for each cell:
ec1E.outliers <- sapply(ec1E.outliers, function(cell) {
    df <- ec1E.transition.colmeans.df[!(ec1E.transition.colmeans.df$curatedIdent %in%
        c("ec1E", "ec1C-early", "ec1C-late")) & ec1E.transition.colmeans.df$Cell ==
        cell, ]
    df <- df[order(df$sum.tm, decreasing = T), ]
    return(df[1, "curatedIdent"])
})
cells.remove.list$ec1E.outliers <- ec1E.outliers

# add to urd obj and plot:
urd.obj@group.ids[names(ec1E.outliers), "ec1E.oultiers.max.transition"] <- ec1E.outliers

# Plot the ec1E cells with high transitions outside of ec1E/ec1C: cells will be
# colored by the next highest cluster that that cell has transitions to.
plotDim(urd.obj, "ec1E.oultiers.max.transition", discrete.colors = curated.ident.colors,
    alpha = 0.75, point.size = 1.5, plot.title = "ec1E oultiers cluster with highest transition probabilites") +
    scale_color_manual(values = curated.ident.colors, na.value = "#cecece")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

We will also remove a few cells from other clusters that make abnormally high connections to the ec1E cells that remain. These are likely doublets that were not captured in the NMF doublet identification.

ec1E.transitions.rowmeans <- tm[rownames(urd.obj@group.ids[!urd.obj@group.ids$curatedIdent %in%
    c("ec1E", "ec1C"), ]), rownames(urd.obj@group.ids[urd.obj@group.ids$curatedIdent ==
    "ec1E", ])]
ec1E.transitions.rowmeans <- ec1E.transitions.rowmeans[, setdiff(colnames(ec1E.transitions.rowmeans),
    names(ec1E.outliers))]

# make a dataframe with mean transitions to ec1E for each cell and the cluster
# that that cell belongs to:
ec1E.transitions.rowmeans <- rowMeans(ec1E.transitions.rowmeans)
ec1E.transitions.rowmeans <- data.frame(ec1E.transitions.rowmeans)
ec1E.transitions.rowmeans$curatedIdent <- urd.obj@group.ids[rownames(ec1E.transitions.rowmeans),
    "curatedIdent"]

# plot the transitions to ec1E:
ggplot(data = ec1E.transitions.rowmeans, mapping = aes(x = curatedIdent, y = ec1E.transitions.rowmeans)) +
    geom_violin(scale = "width", aes(fill = curatedIdent)) + geom_jitter(alpha = 0.5,
    width = 0.33, aes(color = curatedIdent)) + scale_fill_manual(values = curated.ident.colors) +
    scale_color_manual(values = curated.ident.colors) + theme_bw() + guides(fill = "none",
    color = "none") + labs(x = NULL, y = "Mean Transition Probabilities", title = "Mean Transition Probabilities to ec1E from each curatedIdent") +
    theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 45,
        hjust = 1, vjust = 1))

# Remove cells outside of ec1E and ec1C with high transitions to ec1E:
ec1E.outliers.rm <- rownames(ec1E.transitions.rowmeans[ec1E.transitions.rowmeans$ec1E.transitions.rowmeans >
    5e-05, ])
cells.remove.list$other.cells.with.transitions.to.ec1E <- ec1E.outliers.rm

urd.obj@group.ids[ec1E.outliers.rm, "ec1E.other.cluster.cells.remove"] <- urd.obj@group.ids[ec1E.outliers.rm,
    "curatedIdent"]

# Plot the cells that will be removed, colored by the cluster that they belong
# to:
plotDim(urd.obj, "ec1E.other.cluster.cells.remove", discrete.colors = curated.ident.colors,
    alpha = 0.75, point.size = 1.5, plot.title = "Other cluster cells with high transitions to ec1E") +
    scale_color_manual(values = curated.ident.colors, na.value = "#cecece")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

# cells to remove include the ec1E outliers as well as cells with high
# transitions to ec1E:
ec1E.outliers.rm <- c(ec1E.outliers.rm, names(ec1E.outliers))

Finally, there are a few ec1E cells that have no ec1E NMF module expression but express high ec1B module expression. These may represent doublets or cells that are transdifferentiating from ec1B. We will remove these cells as they will confound pseudotime analysis without transitions between ec1E and ec1B.

# Find the cells that have high ec1B module expression:
X4.module.cells <- urd.obj@nmf.c1[, "X4"]
X4.module.cells <- X4.module.cells[X4.module.cells > 0.25]
X4.module.cells <- names(X4.module.cells)
ec1e.cells.high.X4 <- intersect(rownames(urd.obj@group.ids[urd.obj@group.ids$curatedIdent ==
    "ec1E", ]), X4.module.cells)
cells.remove.list$ec1e.cells.high.X4 <- ec1e.cells.high.X4

# plot ec1e.cells.high.X4:
urd.obj <- groupFromCells(urd.obj, group.id = "ec1e.cells.high.X4", cells = ec1e.cells.high.X4)
plotDimHighlight(urd.obj, "ec1e.cells.high.X4", "True", plot.title = "ec1E cells with high ec1B NMF module",
    highlight.color = "#00BFC4")

# All cells to remove based on: 1: high transitions to clusters other than
# ec1E/ec1C, 2: high transitions from outside cells to ec1E, 3: high expression
# of the ec1B specific NMF module
ec1E.outliers.rm <- unique(c(ec1E.outliers.rm, ec1e.cells.high.X4))

urd.obj <- groupFromCells(urd.obj, group.id = "ec1E.cells.remove", cells = ec1E.outliers.rm)
plotDimHighlight(urd.obj, "ec1E.cells.remove", "True", plot.title = "ec1E cells to remove",
    highlight.color = "#00BFC4")

Re-caculate DM after removing ec1E outliers

Here, we remove the ec1E outliers and re-calculate variable genes, PCA, and the Diffusion Matrix.

subset.obj <- urdSubset(urd.obj, cells.keep = setdiff(colnames(urd.obj@logupx.data),
    ec1E.outliers.rm))
# also subset out the 1 ec1C-other cell:
subset.obj <- urdSubset(subset.obj, cells.keep = setdiff(colnames(subset.obj@logupx.data),
    rownames(subset.obj@group.ids[subset.obj@group.ids$idents.split.ec1C == "ec1C-other",
        ])))

# Plot transitions to and from ec1E before removing these cells:
plotDim(urd.obj, "curatedIdent", point.size = 0.5, alpha = 0.1, transitions.df = edgesFromDM(urd.obj,
    cells = cellsInCluster(urd.obj, "curatedIdent", "ec1E"), include.connected.cells = T),
    legend = F, plot.title = "All ec1E transitions (before outlier removal)")

## Plot transitions to and from ec1E after removing these cells:
plotDim(subset.obj, "curatedIdent", point.size = 0.5, alpha = 0.1, transitions.df = edgesFromDM(subset.obj,
    cells = cellsInCluster(subset.obj, "curatedIdent", "ec1E"), include.connected.cells = T),
    legend = F, plot.title = "All ec1E transitions (after outlier removal)")

# subset the object, rerun var.genes, calcPCA, calcDM:

urd.obj <- subset.obj

var.genes <- findVariableGenes(object = urd.obj, set.object.var.genes = F, diffCV.cutoff = 0.45,
    do.plot = T)
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 3.8 GiB
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 3.8 GiB
## Warning in xy.coords(x, y, xlabel, ylabel, log): 402 x values <= 0 omitted from
## logarithmic plot

urd.obj@var.genes <- var.genes
urd.obj <- calcPCA(urd.obj, genes.use = rownames(urd.obj@logupx.data), mp.factor = 2)
## [1] "2025-09-02 12:08:00.685069: Centering and scaling data."
## [1] "2025-09-02 12:08:05.925419: Removing genes with no variation."
## [1] "2025-09-02 12:08:07.680518: Calculating PCA."
## [1] "2025-09-02 12:08:59.834608: Estimating significant PCs."
## [1] "Marchenko-Pastur eigenvalue null upper bound: 1.8483367555819"
## [1] "28 PCs have eigenvalues larger than 2 times null upper bound."
## [1] "Storing 56 PCs."
urd.obj <- calcDM(urd.obj, genes.use = rownames(urd.obj@logupx.data), knn = 100,
    sigma.use = "local", distance = "cosine")
## [1] "Using local sigma."
## Warning in DiffusionMap(data.use, sigma = sigma.use, k = knn, n_eigs =
## dcs.store, : You have 3000 genes. Consider passing e.g. n_pcs = 50 to speed up
## computation.

Trim ec1E transitions

There are still some transitions to/from ec1E and “early” ec1C cells. This is likely due to early ec1C cells having low ec1C module expression, similar to late trans-differentiating ec1C cells. Because this will cause pseudotime to “skip” the late ec1C cells, it will not properly represent ec1E as a tip. We will therefore set transition probabilities between ec1E and anything other than late ec1C cells to 0 before calculating pseudotime.

# get the cells from ec1E and late ec1C that we will keep transitions between:
dm.cells.keep <- list(ec1E = rownames(urd.obj@group.ids[urd.obj@group.ids$idents.split.ec1C ==
    "ec1E", ]), ec1C.late = rownames(urd.obj@group.ids[urd.obj@group.ids$idents.split.ec1C ==
    "ec1C-late", ]))

# set transition matrix rows and columns from ec1E to anything other than ec1E
# or ec1C-late to 0
urd.obj@dm@transitions[dm.cells.keep$ec1E, setdiff(colnames(urd.obj@dm@transitions),
    unlist(dm.cells.keep))] <- 0
urd.obj@dm@transitions[setdiff(rownames(urd.obj@dm@transitions), unlist(dm.cells.keep)),
    dm.cells.keep$ec1E] <- 0

# plot the transitions to and from ec1E after removing outlier cells and
# trimming ec1E transitions:
plotDim(urd.obj, "curatedIdent", point.size = 0.5, alpha = 0.1, transitions.df = edgesFromDM(urd.obj,
    cells = cellsInCluster(urd.obj, "curatedIdent", "ec1E"), include.connected.cells = T),
    legend = F, plot.title = "All ec1E transitions (after trimming transitions)")

# Plot all transitions across the whole object:
plotDim(urd.obj, "curatedIdent", point.size = 0.5, alpha = 0.1, transitions.plot = 10000,
    legend = T, plot.title = "All transitions (10,000 transitions plotted)")

Calculate pseudotime

We next calculated pseudotime using the interstitial stem cells (ISCs), as determined by expression of Hydra ISC marker (G002332) (Siebert et al., 2019), as the “root”, or starting point of the tree.

set.seed(1212)
# We next calculated pseudotime using the interstitial stem cells, as
# determined by expression of Hydra ISC marker (G002332) (Siebert et al.,
# 2019), as the “root”, or starting point of the tree.
dir.create("Figures/Calc_pseudotime")

# Define root cells as anything in the I-cell cluster
root.cells <- cellsInCluster(urd.obj, "curatedIdent", c("ISC"))
urd.obj <- groupFromCells(urd.obj, "root", root.cells)

# Plot the root cells
plotDim(urd.obj, "root", plot.title = "Root cells")

floods <- floodPseudotime(urd.obj, root.cells = root.cells, n = 50, minimum.cells.flooded = 2,
    verbose = T)
## [1] "Starting flood number 1"
## [1] "Flooding, step 10 - 11.4%: 2638 of 23208 cells visited."
## [1] "Flooding, step 20 - 60.5%: 14036 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.4%: 22828 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23204 of 23208 cells visited."
## [1] "Starting flood number 2"
## [1] "Flooding, step 10 - 12.1%: 2808 of 23208 cells visited."
## [1] "Flooding, step 20 - 68.9%: 15979 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.9%: 22942 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23179 of 23208 cells visited."
## [1] "Starting flood number 3"
## [1] "Flooding, step 10 - 11.6%: 2698 of 23208 cells visited."
## [1] "Flooding, step 20 - 61.7%: 14308 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.5%: 22637 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23188 of 23208 cells visited."
## [1] "Starting flood number 4"
## [1] "Flooding, step 10 - 11.9%: 2758 of 23208 cells visited."
## [1] "Flooding, step 20 - 54.8%: 12720 of 23208 cells visited."
## [1] "Flooding, step 30 - 96.6%: 22427 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23191 of 23208 cells visited."
## [1] "Starting flood number 5"
## [1] "Flooding, step 10 - 12.8%: 2973 of 23208 cells visited."
## [1] "Flooding, step 20 - 62.3%: 14452 of 23208 cells visited."
## [1] "Flooding, step 30 - 99%: 22966 of 23208 cells visited."
## [1] "Starting flood number 6"
## [1] "Flooding, step 10 - 13%: 3021 of 23208 cells visited."
## [1] "Flooding, step 20 - 66.2%: 15359 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.8%: 22932 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23206 of 23208 cells visited."
## [1] "Starting flood number 7"
## [1] "Flooding, step 10 - 12.5%: 2896 of 23208 cells visited."
## [1] "Flooding, step 20 - 63.5%: 14742 of 23208 cells visited."
## [1] "Flooding, step 30 - 98%: 22734 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.8%: 23160 of 23208 cells visited."
## [1] "Starting flood number 8"
## [1] "Flooding, step 10 - 11.8%: 2734 of 23208 cells visited."
## [1] "Flooding, step 20 - 58.6%: 13594 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.1%: 22775 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23199 of 23208 cells visited."
## [1] "Starting flood number 9"
## [1] "Flooding, step 10 - 12.1%: 2811 of 23208 cells visited."
## [1] "Flooding, step 20 - 54.2%: 12585 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.6%: 22660 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23202 of 23208 cells visited."
## [1] "Starting flood number 10"
## [1] "Flooding, step 10 - 11.7%: 2725 of 23208 cells visited."
## [1] "Flooding, step 20 - 57.1%: 13245 of 23208 cells visited."
## [1] "Flooding, step 30 - 96.2%: 22318 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23189 of 23208 cells visited."
## [1] "Starting flood number 11"
## [1] "Flooding, step 10 - 12.9%: 3002 of 23208 cells visited."
## [1] "Flooding, step 20 - 61.6%: 14303 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.5%: 22630 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23184 of 23208 cells visited."
## [1] "Starting flood number 12"
## [1] "Flooding, step 10 - 12.1%: 2798 of 23208 cells visited."
## [1] "Flooding, step 20 - 61%: 14161 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.9%: 22728 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23199 of 23208 cells visited."
## [1] "Starting flood number 13"
## [1] "Flooding, step 10 - 12.3%: 2853 of 23208 cells visited."
## [1] "Flooding, step 20 - 57.3%: 13289 of 23208 cells visited."
## [1] "Flooding, step 30 - 98%: 22739 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23202 of 23208 cells visited."
## [1] "Starting flood number 14"
## [1] "Flooding, step 10 - 11.8%: 2734 of 23208 cells visited."
## [1] "Flooding, step 20 - 59.4%: 13779 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.2%: 22568 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.8%: 23169 of 23208 cells visited."
## [1] "Starting flood number 15"
## [1] "Flooding, step 10 - 12.1%: 2813 of 23208 cells visited."
## [1] "Flooding, step 20 - 60.2%: 13968 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.1%: 22541 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23204 of 23208 cells visited."
## [1] "Starting flood number 16"
## [1] "Flooding, step 10 - 12.7%: 2955 of 23208 cells visited."
## [1] "Flooding, step 20 - 61.1%: 14173 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.3%: 22809 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23204 of 23208 cells visited."
## [1] "Starting flood number 17"
## [1] "Flooding, step 10 - 12.5%: 2907 of 23208 cells visited."
## [1] "Flooding, step 20 - 67.7%: 15704 of 23208 cells visited."
## [1] "Flooding, step 30 - 99%: 22966 of 23208 cells visited."
## [1] "Starting flood number 18"
## [1] "Flooding, step 10 - 12.1%: 2807 of 23208 cells visited."
## [1] "Flooding, step 20 - 59.8%: 13879 of 23208 cells visited."
## [1] "Flooding, step 30 - 96.6%: 22421 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.7%: 23131 of 23208 cells visited."
## [1] "Starting flood number 19"
## [1] "Flooding, step 10 - 11.6%: 2682 of 23208 cells visited."
## [1] "Flooding, step 20 - 58%: 13454 of 23208 cells visited."
## [1] "Flooding, step 30 - 98%: 22741 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23206 of 23208 cells visited."
## [1] "Starting flood number 20"
## [1] "Flooding, step 10 - 11.5%: 2661 of 23208 cells visited."
## [1] "Flooding, step 20 - 59.3%: 13757 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.5%: 22860 of 23208 cells visited."
## [1] "Starting flood number 21"
## [1] "Flooding, step 10 - 12.1%: 2812 of 23208 cells visited."
## [1] "Flooding, step 20 - 58.8%: 13648 of 23208 cells visited."
## [1] "Flooding, step 30 - 98%: 22737 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23191 of 23208 cells visited."
## [1] "Starting flood number 22"
## [1] "Flooding, step 10 - 11.9%: 2757 of 23208 cells visited."
## [1] "Flooding, step 20 - 61.3%: 14222 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.3%: 22803 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23191 of 23208 cells visited."
## [1] "Starting flood number 23"
## [1] "Flooding, step 10 - 12%: 2789 of 23208 cells visited."
## [1] "Flooding, step 20 - 55.9%: 12980 of 23208 cells visited."
## [1] "Flooding, step 30 - 95.7%: 22201 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23194 of 23208 cells visited."
## [1] "Starting flood number 24"
## [1] "Flooding, step 10 - 12.2%: 2832 of 23208 cells visited."
## [1] "Flooding, step 20 - 55.5%: 12886 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.6%: 22642 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23197 of 23208 cells visited."
## [1] "Starting flood number 25"
## [1] "Flooding, step 10 - 11.3%: 2626 of 23208 cells visited."
## [1] "Flooding, step 20 - 56.9%: 13210 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.8%: 22709 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23203 of 23208 cells visited."
## [1] "Starting flood number 26"
## [1] "Flooding, step 10 - 12.4%: 2871 of 23208 cells visited."
## [1] "Flooding, step 20 - 64.4%: 14949 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.5%: 22619 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23190 of 23208 cells visited."
## [1] "Starting flood number 27"
## [1] "Flooding, step 10 - 11.9%: 2769 of 23208 cells visited."
## [1] "Flooding, step 20 - 57.4%: 13323 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.5%: 22636 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23198 of 23208 cells visited."
## [1] "Starting flood number 28"
## [1] "Flooding, step 10 - 12.9%: 2990 of 23208 cells visited."
## [1] "Flooding, step 20 - 64.1%: 14872 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.1%: 22778 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23189 of 23208 cells visited."
## [1] "Starting flood number 29"
## [1] "Flooding, step 10 - 12.1%: 2817 of 23208 cells visited."
## [1] "Flooding, step 20 - 59.1%: 13707 of 23208 cells visited."
## [1] "Flooding, step 30 - 96.7%: 22447 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.8%: 23151 of 23208 cells visited."
## [1] "Starting flood number 30"
## [1] "Flooding, step 10 - 11.9%: 2767 of 23208 cells visited."
## [1] "Flooding, step 20 - 60.7%: 14076 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.6%: 22649 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.8%: 23173 of 23208 cells visited."
## [1] "Starting flood number 31"
## [1] "Flooding, step 10 - 12.1%: 2806 of 23208 cells visited."
## [1] "Flooding, step 20 - 64.7%: 15010 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.3%: 22818 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23204 of 23208 cells visited."
## [1] "Starting flood number 32"
## [1] "Flooding, step 10 - 12%: 2790 of 23208 cells visited."
## [1] "Flooding, step 20 - 58.6%: 13599 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.6%: 22645 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23193 of 23208 cells visited."
## [1] "Starting flood number 33"
## [1] "Flooding, step 10 - 11.7%: 2723 of 23208 cells visited."
## [1] "Flooding, step 20 - 55.5%: 12880 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.4%: 22835 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23205 of 23208 cells visited."
## [1] "Starting flood number 34"
## [1] "Flooding, step 10 - 12.1%: 2797 of 23208 cells visited."
## [1] "Flooding, step 20 - 62.8%: 14567 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.6%: 22886 of 23208 cells visited."
## [1] "Starting flood number 35"
## [1] "Flooding, step 10 - 12.4%: 2887 of 23208 cells visited."
## [1] "Flooding, step 20 - 60.2%: 13961 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.6%: 22656 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23192 of 23208 cells visited."
## [1] "Starting flood number 36"
## [1] "Flooding, step 10 - 12.8%: 2979 of 23208 cells visited."
## [1] "Flooding, step 20 - 68.7%: 15949 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.9%: 22943 of 23208 cells visited."
## [1] "Starting flood number 37"
## [1] "Flooding, step 10 - 11.2%: 2609 of 23208 cells visited."
## [1] "Flooding, step 20 - 56.5%: 13117 of 23208 cells visited."
## [1] "Flooding, step 30 - 97%: 22505 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.8%: 23168 of 23208 cells visited."
## [1] "Starting flood number 38"
## [1] "Flooding, step 10 - 11.5%: 2669 of 23208 cells visited."
## [1] "Flooding, step 20 - 56.2%: 13038 of 23208 cells visited."
## [1] "Flooding, step 30 - 96.4%: 22373 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23200 of 23208 cells visited."
## [1] "Starting flood number 39"
## [1] "Flooding, step 10 - 12.8%: 2962 of 23208 cells visited."
## [1] "Flooding, step 20 - 68%: 15791 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.7%: 22907 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23204 of 23208 cells visited."
## [1] "Starting flood number 40"
## [1] "Flooding, step 10 - 13.8%: 3193 of 23208 cells visited."
## [1] "Flooding, step 20 - 71.9%: 16689 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.4%: 22831 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23203 of 23208 cells visited."
## [1] "Starting flood number 41"
## [1] "Flooding, step 10 - 11.9%: 2773 of 23208 cells visited."
## [1] "Flooding, step 20 - 65.1%: 15119 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.3%: 22585 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23176 of 23208 cells visited."
## [1] "Starting flood number 42"
## [1] "Flooding, step 10 - 12%: 2782 of 23208 cells visited."
## [1] "Flooding, step 20 - 64.2%: 14899 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.7%: 22684 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23199 of 23208 cells visited."
## [1] "Starting flood number 43"
## [1] "Flooding, step 10 - 12.1%: 2804 of 23208 cells visited."
## [1] "Flooding, step 20 - 61.2%: 14194 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.3%: 22584 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23201 of 23208 cells visited."
## [1] "Starting flood number 44"
## [1] "Flooding, step 10 - 11.1%: 2582 of 23208 cells visited."
## [1] "Flooding, step 20 - 52.5%: 12175 of 23208 cells visited."
## [1] "Flooding, step 30 - 95.6%: 22189 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.7%: 23149 of 23208 cells visited."
## [1] "Starting flood number 45"
## [1] "Flooding, step 10 - 12.9%: 2987 of 23208 cells visited."
## [1] "Flooding, step 20 - 60.2%: 13960 of 23208 cells visited."
## [1] "Flooding, step 30 - 96.9%: 22496 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23202 of 23208 cells visited."
## [1] "Starting flood number 46"
## [1] "Flooding, step 10 - 12.3%: 2865 of 23208 cells visited."
## [1] "Flooding, step 20 - 57.3%: 13289 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.6%: 22647 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23205 of 23208 cells visited."
## [1] "Starting flood number 47"
## [1] "Flooding, step 10 - 12.5%: 2895 of 23208 cells visited."
## [1] "Flooding, step 20 - 62.5%: 14498 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.5%: 22869 of 23208 cells visited."
## [1] "Starting flood number 48"
## [1] "Flooding, step 10 - 11.7%: 2716 of 23208 cells visited."
## [1] "Flooding, step 20 - 56.5%: 13120 of 23208 cells visited."
## [1] "Flooding, step 30 - 95.9%: 22249 of 23208 cells visited."
## [1] "Flooding, step 40 - 100%: 23202 of 23208 cells visited."
## [1] "Starting flood number 49"
## [1] "Flooding, step 10 - 12.4%: 2877 of 23208 cells visited."
## [1] "Flooding, step 20 - 60.8%: 14108 of 23208 cells visited."
## [1] "Flooding, step 30 - 97.6%: 22658 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.8%: 23172 of 23208 cells visited."
## [1] "Starting flood number 50"
## [1] "Flooding, step 10 - 13%: 3013 of 23208 cells visited."
## [1] "Flooding, step 20 - 65%: 15094 of 23208 cells visited."
## [1] "Flooding, step 30 - 98.1%: 22772 of 23208 cells visited."
## [1] "Flooding, step 40 - 99.9%: 23182 of 23208 cells visited."
# The we process the simulations into a pseudotime
urd.obj <- floodPseudotimeProcess(urd.obj, floods, floods.name = "pseudotime")

# We can make sure that enough simulations have been performed by looking at
# the change in cell pseudotime as more simulations are added. Here, we can see
# that an asymptote was reached around 40 simulations, so 50 was enough.
pseudotimePlotStabilityOverall(urd.obj)

# We can also plot pseudotime on the UMAP
plotDim(urd.obj, "pseudotime", plot.title = "Pseudotime")

# Plotting distribution of pseudotime per cluster:
plotDists(urd.obj, "pseudotime", "curatedIdent", plot.title = "Pseudotime by cluster")

Determine Tip cells

To perform random walks backwards in pseudotime, we need to determine which cells we will use as tips. Terminal neural populations were chosen from Infomap-Jaccard community detection clustering and tip clusters were selected based on (1) their late pseudotime as assigned by URD and (2) several differentially expressed genes.

dir.create("Figures/choosing_tips")

# Define the idents where we want to find tips.
tip.idents <- c("en1", "en2", "en3", "ec2", "ec3A", "ec3B", "ec3C", "ec4A", "ec5",
    "ec1C", "ec1E", "ec1B", "ec1D", "ec1A")

# subset urd to those idents, cluster, and find subclusters to use as tips.
subset.obj <- urdSubset(urd.obj, cells.keep = cellsInCluster(urd.obj, clustering = "curatedIdent",
    cluster = tip.idents))

# perform louvain clustering
set.seed(1212)
subset.obj <- graphClustering(subset.obj, do.jaccard = T, method = "Infomap", num.nn = c(120))

# plot clusters to determine which to use for random walks
plotDim(subset.obj, "Infomap-120", label.clusters = T, legend = F)

# get subclusters for tips: some may be combined later.
tip.sub.clusters <- list(ec4A = c("13", "46", "5", "42"), ec2 = c("1", "44"), ec1B = c("2",
    "25", "11"), ec1E = "45", ec1C = c("20"), ec1A = c("22", "4"), ec1D = "14", ec5 = c("9",
    "16"), en3 = c("39", "6", "40", "37"), en1 = c("12", "24", "47"), en2 = c("10",
    "30", "23", "18"), ec3A = c("29", "41"), ec3B = c("8", "36"), ec3C = c("27",
    "7"))

# add tip clusters to full subset:
cluster.tip.cells <- cellsInCluster(subset.obj, "Infomap-120", unlist(tip.sub.clusters))
urd.obj@group.ids[cluster.tip.cells, "tip.clusters"] <- subset.obj@group.ids[cluster.tip.cells,
    "Infomap-120"]

# plot tip cells:
plotDim(urd.obj, "tip.clusters", label.clusters = T, legend = F, plot.title = "Tip Clusters to test for neuron specific tree")

Perform random walks

Biased random walks were performed to determine the cells visited from each terminal population in the data. Since Hydra are constantly replenishing their tissues in a homeostatic manner, cell density along developmental processes varies widely, with a large number of transcriptionally similar differentiated cells and much smaller numbers of cells in transition. Thus, we used a larger max.cells.back value in an attempt to force the random walks to visit, and not bypass, cells of similar pseudotime when trajectory reconstruction was proceeding through regions of high cell density.

dir.create("Figures/Random_walks")

# Bias the transition probabilities by cellular pseudotime
pt.logistic <- pseudotimeDetermineLogistic(urd.obj, pseudotime = "pseudotime", optimal.cells.forward = 0,
    max.cells.back = 200, pseudotime.direction = "<")

## [1] "Mean pseudotime back (~200 cells) 0.00666371304794678"
## [1] "Chance of accepted move to equal pseudotime is 0.99009900990099"
## [1] "Mean pseudotime forward (~0 cells) 0"
tm.biased <- as.matrix(pseudotimeWeightTransitionMatrix(urd.obj, pseudotime = "pseudotime",
    logistic.params = pt.logistic, pseudotime.direction = "<"))

# Simulate the biased random walks from each tip
walks <- simulateRandomWalksFromTips(urd.obj, tip.group.id = "tip.clusters", root.cells = root.cells,
    transition.matrix = tm.biased, n.per.tip = 50000, root.visits = 1, verbose = T)
## [1] "2025-09-02 13:58:14.547726 - Starting random walks from tip 1"
## [1] "Starting walk 5000 from cell AAATGGATCGGTGCAC-1_3"
## [1] "Starting walk 10000 from cell ATTACCTAGTACCATC-1_2"
## [1] "Starting walk 15000 from cell CAGAGCCGTGGCCACT-1_2"
## [1] "Starting walk 20000 from cell CATTGCCCATTCTGTT-1_3"
## [1] "Starting walk 25000 from cell AGGAGGTCATAATCCG-1_3"
## [1] "Starting walk 30000 from cell CATCAAGTCGGCTGTG-1_3"
## [1] "Starting walk 35000 from cell GTTGCTCTCCCTGGTT-1_3"
## [1] "Starting walk 40000 from cell GATTCGATCCGATGCG-1_3"
## [1] "Starting walk 45000 from cell TGTGAGTGTGCCTATA-1_3"
## [1] "Starting walk 50000 from cell TCTATCAGTAACCCGC-1_2"
## [1] "2025-09-02 14:09:40.577752 - Starting random walks from tip 10"
## [1] "Starting walk 5000 from cell CTCTCAGCATATCGGT-1_4"
## [1] "Starting walk 10000 from cell CACGGGTAGCAATTAG-1_2"
## [1] "Starting walk 15000 from cell TTTCACAGTGTTGACT-1_1"
## [1] "Starting walk 20000 from cell ATATCCTCACCTTCGT-1_2"
## [1] "Starting walk 25000 from cell GACTATGGTAGTTAGA-1_4"
## [1] "Starting walk 30000 from cell AACCATGCAAGGTCGA-1_3"
## [1] "Starting walk 35000 from cell AAGTCGTCATCATTTC-1_1"
## [1] "Starting walk 40000 from cell GAGTGTTGTTCGGTTA-1_1"
## [1] "Starting walk 45000 from cell GGGAGTATCTCATTGT-1_2"
## [1] "Starting walk 50000 from cell GGTCACGGTATACGGG-1_4"
## [1] "2025-09-02 14:19:35.977032 - Starting random walks from tip 11"
## [1] "Starting walk 5000 from cell AGCGCCAAGCGTCAAG-1_4"
## [1] "Starting walk 10000 from cell CTTCCGAGTTCCGTTC-1_4"
## [1] "Starting walk 15000 from cell AGGAAATCAGCGGTCT-1_4"
## [1] "Starting walk 20000 from cell TTATTGCTCAGCTGAT-1_4"
## [1] "Starting walk 25000 from cell AAGAACATCACTGCTC-1_1"
## [1] "Starting walk 30000 from cell CGAAGGAGTCTGTGTA-1_4"
## [1] "Starting walk 35000 from cell AGGTTACCACGGCTAC-1_1"
## [1] "Starting walk 40000 from cell TTCGCTGAGGTCGTGA-1_4"
## [1] "Starting walk 45000 from cell TTAGGGTGTGGAACAC-1_4"
## [1] "Starting walk 50000 from cell CTGCGAGCACACACTA-1_3"
## [1] "2025-09-02 14:30:43.63662 - Starting random walks from tip 12"
## [1] "Starting walk 5000 from cell CGAGGCTGTAGTCGGA-1_3"
## [1] "Starting walk 10000 from cell TCTTGCGTCGACATCA-1_2"
## [1] "Starting walk 15000 from cell ATCCCTGAGGGTAATT-1_3"
## [1] "Starting walk 20000 from cell TGGGTTACAGTCAGCC-1_3"
## [1] "Starting walk 25000 from cell AGTACCATCCGTGTGG-1_1"
## [1] "Starting walk 30000 from cell TTTCACATCGTGTGAT-1_3"
## [1] "Starting walk 35000 from cell TCCCACACAGACCAGA-1_1"
## [1] "Starting walk 40000 from cell AAGACAATCACTGCTC-1_1"
## [1] "Starting walk 45000 from cell CAACGATAGCACTTTG-1_3"
## [1] "Starting walk 50000 from cell TCTGCCATCTGGTGGC-1_2"
## [1] "2025-09-02 14:39:57.876424 - Starting random walks from tip 13"
## [1] "Starting walk 5000 from cell CCCTTAGTCGTGCATA-1_2"
## [1] "Starting walk 10000 from cell CTGATCCTCATCACCC-1_1"
## [1] "Starting walk 15000 from cell GTAGAGGAGAGGACTC-1_1"
## [1] "Starting walk 20000 from cell GATCATGGTCCAAGAG-1_4"
## [1] "Starting walk 25000 from cell AGAACAAAGATAGCAT-1_4"
## [1] "Starting walk 30000 from cell CTTGATTCATACAGAA-1_4"
## [1] "Starting walk 35000 from cell GTAAGTCAGTTGGGAC-1_1"
## [1] "Starting walk 40000 from cell AGTACCATCTTGGAAC-1_4"
## [1] "Starting walk 45000 from cell AACGAAAAGGCACGAT-1_4"
## [1] "Starting walk 50000 from cell GTGCTTCGTGGTCCGT-1_4"
## [1] "2025-09-02 14:54:00.073237 - Starting random walks from tip 14"
## [1] "Starting walk 5000 from cell ATGTCTTCACCGGCTA-1_2"
## [1] "Starting walk 10000 from cell TGCACGGCATACGCAT-1_1"
## [1] "Starting walk 15000 from cell GTCACTCAGGTCGTAG-1_2"
## [1] "Starting walk 20000 from cell AGGACTTAGAAGTGTT-1_4"
## [1] "Starting walk 25000 from cell TACTTCACAGAGGACT-1_2"
## [1] "Starting walk 30000 from cell TGCTTCGTCAAGCCAT-1_4"
## [1] "Starting walk 35000 from cell TTCCAATCACACGTGC-1_4"
## [1] "Starting walk 40000 from cell ACGCACGAGTTGCTGT-1_3"
## [1] "Starting walk 45000 from cell TTTCATGGTGGGTATG-1_4"
## [1] "Starting walk 50000 from cell TGCACGGCATACGCAT-1_1"
## [1] "2025-09-02 15:02:46.741654 - Starting random walks from tip 16"
## [1] "Starting walk 5000 from cell TGCTGAAAGATACATG-1_4"
## [1] "Starting walk 10000 from cell GTTGCGGCATACGCAT-1_3"
## [1] "Starting walk 15000 from cell TCATGGACAGCCTACG-1_1"
## [1] "Starting walk 20000 from cell ATTGGGTAGCCAAGGT-1_2"
## [1] "Starting walk 25000 from cell ACTGATGCATCCTGTC-1_1"
## [1] "Starting walk 30000 from cell CCCGGAACAATGTCTG-1_4"
## [1] "Starting walk 35000 from cell GTCTTTAAGAATCTAG-1_2"
## [1] "Starting walk 40000 from cell CGTGTCTTCCACAGCG-1_2"
## [1] "Starting walk 45000 from cell GGGATGAAGCTGGAGT-1_2"
## [1] "Starting walk 50000 from cell TAGCACACAAAGAACT-1_2"
## [1] "2025-09-02 15:10:36.88822 - Starting random walks from tip 18"
## [1] "Starting walk 5000 from cell GAGAGGTTCCTACAAG-1_4"
## [1] "Starting walk 10000 from cell GAGAGGTTCCTCTTTC-1_4"
## [1] "Starting walk 15000 from cell TGCATGAAGTAACGTA-1_4"
## [1] "Starting walk 20000 from cell ACCTGTCTCATCCCGT-1_4"
## [1] "Starting walk 25000 from cell AGAGAGCTCTTTGCGC-1_3"
## [1] "Starting walk 30000 from cell GCACATACAGAACATA-1_4"
## [1] "Starting walk 35000 from cell TCATGCCTCTAGGAAA-1_3"
## [1] "Starting walk 40000 from cell CGTTGGGAGGGTTAAT-1_2"
## [1] "Starting walk 45000 from cell GGAGGTAGTCCGCAGT-1_3"
## [1] "Starting walk 50000 from cell GAATCACTCATGTCTT-1_1"
## [1] "2025-09-02 15:19:18.216047 - Starting random walks from tip 2"
## [1] "Starting walk 5000 from cell AGTACCAGTGAATTGA-1_1"
## [1] "Starting walk 10000 from cell ATAGGCTGTTGGCCTG-1_1"
## [1] "Starting walk 15000 from cell TCACAAGGTTTACGTG-1_1"
## [1] "Starting walk 20000 from cell AGCCACGGTACACTCA-1_4"
## [1] "Starting walk 25000 from cell TAACGACAGCTGAGTG-1_2"
## [1] "Starting walk 30000 from cell ACAAAGACAGCCCAGT-1_1"
## [1] "Starting walk 35000 from cell GAATCGTGTAACTGCT-1_2"
## [1] "Starting walk 40000 from cell AGACAGGGTCTACAAC-1_4"
## [1] "Starting walk 45000 from cell TCGTAGAGTCTGTTAG-1_1"
## [1] "Starting walk 50000 from cell CGATCGGGTGCCCACA-1_2"
## [1] "2025-09-02 15:30:16.500301 - Starting random walks from tip 20"
## [1] "Starting walk 5000 from cell AGTACTGTCTTGGTGA-1_4"
## [1] "Starting walk 10000 from cell GTTGTCCCACACGCCA-1_4"
## [1] "Starting walk 15000 from cell TGTTCCGAGCTCCCTT-1_3"
## [1] "Starting walk 20000 from cell GGCTGTGCAGCTCCTT-1_4"
## [1] "Starting walk 25000 from cell AGTACTGTCTTGGTGA-1_4"
## [1] "Starting walk 30000 from cell GTTCATTCATCAGTCA-1_2"
## [1] "Starting walk 35000 from cell TATCTGTAGCATCTTG-1_4"
## [1] "Starting walk 40000 from cell CGTCCATAGGTATAGT-1_4"
## [1] "Starting walk 45000 from cell CTCGAGGAGTCACGAG-1_3"
## [1] "Starting walk 50000 from cell ATCTTCAGTTTCCAAG-1_1"
## [1] "2025-09-02 15:48:00.419956 - Starting random walks from tip 22"
## [1] "Starting walk 5000 from cell CTGCGAGAGTGGTCAG-1_4"
## [1] "Starting walk 10000 from cell ATGTCCCCATTAAAGG-1_3"
## [1] "Starting walk 15000 from cell ACCAAACTCTGCATAG-1_3"
## [1] "Starting walk 20000 from cell CCCAACTGTTCACGAT-1_3"
## [1] "Starting walk 25000 from cell ACTGATGGTGCAACAG-1_1"
## [1] "Starting walk 30000 from cell GAAGCGAGTACTCGTA-1_2"
## [1] "Starting walk 35000 from cell CAATTTCGTGCAAGAC-1_4"
## [1] "Starting walk 40000 from cell CGATGCGGTTAGTTCG-1_1"
## [1] "Starting walk 45000 from cell CAGAGCCGTTTCGACA-1_4"
## [1] "Starting walk 50000 from cell CGAGGCTTCGGACTGC-1_4"
## [1] "2025-09-02 15:56:18.703443 - Starting random walks from tip 23"
## [1] "Starting walk 5000 from cell ACGGGTCAGCCATGCC-1_3"
## [1] "Starting walk 10000 from cell AAAGGGCAGGTAACTA-1_4"
## [1] "Starting walk 15000 from cell AAATGGATCGACCACG-1_4"
## [1] "Starting walk 20000 from cell GGTGTCGTCTTGAACG-1_4"
## [1] "Starting walk 25000 from cell CATACAGTCTGCGATA-1_4"
## [1] "Starting walk 30000 from cell ATCGGCGTCAGAGCAG-1_4"
## [1] "Starting walk 35000 from cell ATACTTCAGCGGACAT-1_4"
## [1] "Starting walk 40000 from cell GATGAGGGTTTAGAGA-1_4"
## [1] "Starting walk 45000 from cell TTGGTTTGTACCGTGC-1_3"
## [1] "Starting walk 50000 from cell CCACGTTTCTGCTTTA-1_4"
## [1] "2025-09-02 16:04:02.621747 - Starting random walks from tip 24"
## [1] "Starting walk 5000 from cell GCATCGGTCACTCGAA-1_1"
## [1] "Starting walk 10000 from cell GTGAGGAGTGAATTAG-1_2"
## [1] "Starting walk 15000 from cell GGAACCCTCCTTTAGT-1_4"
## [1] "Starting walk 20000 from cell AGCTACAAGTAGGTTA-1_1"
## [1] "Starting walk 25000 from cell ACCGTTCTCGCGCTGA-1_2"
## [1] "Starting walk 30000 from cell ATCCACCCATGCACTA-1_3"
## [1] "Starting walk 35000 from cell GCACATAAGCAGGTCA-1_3"
## [1] "Starting walk 40000 from cell ACTGTGAAGTATAACG-1_3"
## [1] "Starting walk 45000 from cell CGTGTCTTCACTGTCC-1_4"
## [1] "Starting walk 50000 from cell GTCGAATGTACCACGC-1_2"
## [1] "2025-09-02 16:13:10.657008 - Starting random walks from tip 25"
## [1] "Starting walk 5000 from cell GCATTAGCACGTCGGT-1_4"
## [1] "Starting walk 10000 from cell CATAGACTCGACATCA-1_4"
## [1] "Starting walk 15000 from cell TTTCGATTCAGTGTGT-1_4"
## [1] "Starting walk 20000 from cell CAGTTCCCATCCTAAG-1_4"
## [1] "Starting walk 25000 from cell GCATCTCTCAGCGTCG-1_4"
## [1] "Starting walk 30000 from cell AAGCCATGTACGGCAA-1_4"
## [1] "Starting walk 35000 from cell GGGATCCAGATTAGTG-1_3"
## [1] "Starting walk 40000 from cell GTCGTTCGTTAAGAAC-1_4"
## [1] "Starting walk 45000 from cell CTCAAGAAGCGTGAAC-1_4"
## [1] "Starting walk 50000 from cell GAGTTGTAGGGACAGG-1_4"
## [1] "2025-09-02 16:23:57.808939 - Starting random walks from tip 27"
## [1] "Starting walk 5000 from cell TGATTCTAGTGCGTCC-1_1"
## [1] "Starting walk 10000 from cell GCGGAAAAGCTTCTAG-1_1"
## [1] "Starting walk 15000 from cell TATTGCTTCCCAGTGG-1_2"
## [1] "Starting walk 20000 from cell CTTCAATGTAGACACG-1_1"
## [1] "Starting walk 25000 from cell CCCATTGAGCTGGAGT-1_2"
## [1] "Starting walk 30000 from cell GACAGCCTCCCGGTAG-1_3"
## [1] "Starting walk 35000 from cell TGATTTCAGTATGGCG-1_4"
## [1] "Starting walk 40000 from cell TTCCTCTTCCTCTAGC-1_2"
## [1] "Starting walk 45000 from cell CAGATTGGTTCGAGCC-1_4"
## [1] "Starting walk 50000 from cell TGATTTCAGTATGGCG-1_4"
## [1] "2025-09-02 16:38:11.413533 - Starting random walks from tip 29"
## [1] "Starting walk 5000 from cell CGAGGAACAAGACTGG-1_3"
## [1] "Starting walk 10000 from cell TGCGGGTTCATCAGTG-1_1"
## [1] "Starting walk 15000 from cell GGAATCTAGGATAATC-1_2"
## [1] "Starting walk 20000 from cell AACCTGATCCCGTAAA-1_3"
## [1] "Starting walk 25000 from cell ATACCTTAGGTCGACA-1_3"
## [1] "Starting walk 30000 from cell ATACCTTTCGTGACTA-1_3"
## [1] "Starting walk 35000 from cell ATACCTTTCGTGACTA-1_3"
## [1] "Starting walk 40000 from cell AACTTCTCACTCCGAG-1_1"
## [1] "Starting walk 45000 from cell CCTCCTCGTCTACGTA-1_1"
## [1] "Starting walk 50000 from cell TGAGACTAGGAGTCTG-1_1"
## [1] "2025-09-02 16:45:29.194119 - Starting random walks from tip 30"
## [1] "Starting walk 5000 from cell TGTGATGCAAAGGGCT-1_3"
## [1] "Starting walk 10000 from cell GTAGGTTGTTTGATCG-1_2"
## [1] "Starting walk 15000 from cell GATAGCTGTTCTATCT-1_1"
## [1] "Starting walk 20000 from cell TCACGCTTCGGAGCAA-1_1"
## [1] "Starting walk 25000 from cell CAGCACGTCTGGTGCG-1_4"
## [1] "Starting walk 30000 from cell ACATCGACACATACGT-1_3"
## [1] "Starting walk 35000 from cell TCAGTTTCAGCGACCT-1_4"
## [1] "Starting walk 40000 from cell TGAGCGCTCAAACCTG-1_1"
## [1] "Starting walk 45000 from cell TGATTCTGTAATACCC-1_2"
## [1] "Starting walk 50000 from cell AAACGCTCATCCGGTG-1_4"
## [1] "2025-09-02 16:54:19.919281 - Starting random walks from tip 36"
## [1] "Starting walk 5000 from cell ACCACAAGTGAGACCA-1_1"
## [1] "Starting walk 10000 from cell ATACCTTAGCAAATGT-1_2"
## [1] "Starting walk 15000 from cell AATCACGTCGTCAGAT-1_1"
## [1] "Starting walk 20000 from cell TTTACGTCATTACTCT-1_1"
## [1] "Starting walk 25000 from cell TACCCGTCAGCGACCT-1_2"
## [1] "Starting walk 30000 from cell CATGCGGAGATCCTAC-1_1"
## [1] "Starting walk 35000 from cell AAAGGGCCATAGAGGC-1_1"
## [1] "Starting walk 40000 from cell TTCACGCTCCGTGTAA-1_1"
## [1] "Starting walk 45000 from cell CTACGGGTCGACGATT-1_3"
## [1] "Starting walk 50000 from cell TGATCAGGTTCTAACG-1_3"
## [1] "2025-09-02 17:02:15.218194 - Starting random walks from tip 37"
## [1] "Starting walk 5000 from cell ACATCGATCTCCGAAA-1_3"
## [1] "Starting walk 10000 from cell TCCATCGAGGAGGTTC-1_3"
## [1] "Starting walk 15000 from cell TATCTTGAGTCTAGCT-1_4"
## [1] "Starting walk 20000 from cell CACAGATTCTGCTCTG-1_2"
## [1] "Starting walk 25000 from cell TCTATACCAGGTGTGA-1_4"
## [1] "Starting walk 30000 from cell AGGCCACCACCCTCTA-1_4"
## [1] "Starting walk 35000 from cell GTTCTATAGACGGAAA-1_4"
## [1] "Starting walk 40000 from cell CGTAAGTTCACTTTGT-1_4"
## [1] "Starting walk 45000 from cell ACTATTCCAGGGCTTC-1_4"
## [1] "Starting walk 50000 from cell TGTTCTACAGTCCCGA-1_4"
## [1] "2025-09-02 17:08:33.236963 - Starting random walks from tip 39"
## [1] "Starting walk 5000 from cell GACATCACACGGTGTC-1_4"
## [1] "Starting walk 10000 from cell CTTCCTTTCTTCTGTA-1_1"
## [1] "Starting walk 15000 from cell CTGAGCGTCAGGTAAA-1_4"
## [1] "Starting walk 20000 from cell TGGGCGTGTAGGCAAC-1_3"
## [1] "Starting walk 25000 from cell TATCAGGCAGGTCCGT-1_1"
## [1] "Starting walk 30000 from cell TATATCCAGTTCTACG-1_4"
## [1] "Starting walk 35000 from cell CGCCATTCAACCGGAA-1_4"
## [1] "Starting walk 40000 from cell GGAATGGAGAACTTCC-1_4"
## [1] "Starting walk 45000 from cell CTCAACCCAATCGAAA-1_3"
## [1] "Starting walk 50000 from cell GGGCGTTTCTGCGAGC-1_4"
## [1] "2025-09-02 17:15:42.477927 - Starting random walks from tip 4"
## [1] "Starting walk 5000 from cell TCGATTTGTATCACGT-1_2"
## [1] "Starting walk 10000 from cell CAGGCCAAGCGGCTCT-1_4"
## [1] "Starting walk 15000 from cell ATTCATCCAACTGCTA-1_4"
## [1] "Starting walk 20000 from cell TCACATTAGGCGCTCT-1_2"
## [1] "Starting walk 25000 from cell GTGTGGCGTGGCTACC-1_4"
## [1] "Starting walk 30000 from cell GTCGCGACAGCCTATA-1_1"
## [1] "Starting walk 35000 from cell GACCTTCAGCCGTCGT-1_1"
## [1] "Starting walk 40000 from cell TTCTCTCAGCTGGCCT-1_2"
## [1] "Starting walk 45000 from cell ACCACAATCGCGTGCA-1_1"
## [1] "Starting walk 50000 from cell GATCCCTCACCAAAGG-1_4"
## [1] "2025-09-02 17:22:11.661428 - Starting random walks from tip 40"
## [1] "Starting walk 5000 from cell TCCATGCTCGCATTGA-1_4"
## [1] "Starting walk 10000 from cell CAATGACAGAGCAGTC-1_4"
## [1] "Starting walk 15000 from cell ACACAGTCATAGTCAC-1_4"
## [1] "Starting walk 20000 from cell AAGACTCTCTCGCTCA-1_4"
## [1] "Starting walk 25000 from cell CCCTGATGTTGGCCTG-1_4"
## [1] "Starting walk 30000 from cell GTTGCTCAGTTGCTCA-1_3"
## [1] "Starting walk 35000 from cell AGGGCCTGTGCATTTG-1_3"
## [1] "Starting walk 40000 from cell ATCTTCACATGTGTCA-1_4"
## [1] "Starting walk 45000 from cell TCTCTGGTCAGAATAG-1_4"
## [1] "Starting walk 50000 from cell AATCGACTCCACACAA-1_4"
## [1] "2025-09-02 17:29:14.473423 - Starting random walks from tip 41"
## [1] "Starting walk 5000 from cell AGATCCATCTTCTAAC-1_1"
## [1] "Starting walk 10000 from cell ATGGATCTCTACTATC-1_2"
## [1] "Starting walk 15000 from cell GACTCTCAGTGTAGTA-1_1"
## [1] "Starting walk 20000 from cell ATGGATCTCTACTATC-1_2"
## [1] "Starting walk 25000 from cell GGGTCTGCAGTTGGTT-1_2"
## [1] "Starting walk 30000 from cell CTTCCGAAGAGAGGTA-1_4"
## [1] "Starting walk 35000 from cell GTTGTAGTCCGTGGTG-1_3"
## [1] "Starting walk 40000 from cell CCACAAAGTGGCGTAA-1_1"
## [1] "Starting walk 45000 from cell TTAGGGTTCAACCTTT-1_1"
## [1] "Starting walk 50000 from cell CATCAAGTCTCACTCG-1_2"
## [1] "2025-09-02 17:35:37.805151 - Starting random walks from tip 42"
## [1] "Starting walk 5000 from cell TAGGTACTCGCGTCGA-1_4"
## [1] "Starting walk 10000 from cell CTGAATGCACAATGAA-1_4"
## [1] "Starting walk 15000 from cell AGGACGACACGACGCT-1_2"
## [1] "Starting walk 20000 from cell TGAGACTTCTCTGAGA-1_4"
## [1] "Starting walk 25000 from cell TCCTAATTCTGCTCTG-1_4"
## [1] "Starting walk 30000 from cell AACAAAGTCGTAACTG-1_1"
## [1] "Starting walk 35000 from cell TTTCAGTGTGGAGAAA-1_4"
## [1] "Starting walk 40000 from cell TCCACGTCACTTTAGG-1_4"
## [1] "Starting walk 45000 from cell CATACCCGTGATGTAA-1_3"
## [1] "Starting walk 50000 from cell AGTACTGAGTATTGCC-1_2"
## [1] "2025-09-02 17:45:26.034505 - Starting random walks from tip 44"
## [1] "Starting walk 5000 from cell ACAAAGAGTACCGTCG-1_3"
## [1] "Starting walk 10000 from cell TACAGGTCATGAGATA-1_3"
## [1] "Starting walk 15000 from cell TCAGCCTCACTGGCCA-1_3"
## [1] "Starting walk 20000 from cell GTGTGATAGTTGCTGT-1_3"
## [1] "Starting walk 25000 from cell CATCCCATCCACGTCT-1_3"
## [1] "Starting walk 30000 from cell CCATAAGAGGCCCGTT-1_1"
## [1] "Starting walk 35000 from cell CGGGACTCACGTCATA-1_1"
## [1] "Starting walk 40000 from cell ACATCGAAGCAAGTCG-1_2"
## [1] "Starting walk 45000 from cell GGCGTCATCCGATCTC-1_1"
## [1] "Starting walk 50000 from cell AATCGTGAGCTGTCCG-1_3"
## [1] "2025-09-02 17:53:48.887762 - Starting random walks from tip 45"
## [1] "Starting walk 5000 from cell GTTTGGATCTATTTCG-1_3"
## [1] "Starting walk 10000 from cell CCTTCAGCAAATGCTC-1_2"
## [1] "Starting walk 15000 from cell TGGTACAGTACTCAAC-1_4"
## [1] "Starting walk 20000 from cell TGGAGGACATTCATCT-1_2"
## [1] "Starting walk 25000 from cell TGCATGAGTGAATATG-1_2"
## [1] "Starting walk 30000 from cell TGGTTAGGTTGTAGCT-1_3"
## [1] "Starting walk 35000 from cell TCAGTTTTCACATACG-1_3"
## [1] "Starting walk 40000 from cell GAGGGTACAAGCGCTC-1_3"
## [1] "Starting walk 45000 from cell TTCATTGAGTCGCTAT-1_3"
## [1] "Starting walk 50000 from cell TCGTGGGGTCGCTTAA-1_1"
## [1] "2025-09-02 18:09:36.825051 - Starting random walks from tip 46"
## [1] "Starting walk 5000 from cell GTGACGCCATCATTGG-1_4"
## [1] "Starting walk 10000 from cell TCAGCCTTCTCAATCT-1_4"
## [1] "Starting walk 15000 from cell GCCAGTGGTTGCATAC-1_1"
## [1] "Starting walk 20000 from cell GTAACACTCGTAGGGA-1_4"
## [1] "Starting walk 25000 from cell AGCGCCAGTCCTCCAT-1_4"
## [1] "Starting walk 30000 from cell CCTACGTAGATCGGTG-1_4"
## [1] "Starting walk 35000 from cell ATTACCTAGCCTCATA-1_2"
## [1] "Starting walk 40000 from cell GTCGTTCTCTCTATAC-1_4"
## [1] "Starting walk 45000 from cell TTACGTTGTCTAATCG-1_2"
## [1] "Starting walk 50000 from cell CATACTTTCAGCCCAG-1_3"
## [1] "2025-09-02 18:19:34.594507 - Starting random walks from tip 47"
## [1] "Starting walk 5000 from cell CAACAGTAGTCATGCT-1_4"
## [1] "Starting walk 10000 from cell GACATCATCAGAGCAG-1_2"
## [1] "Starting walk 15000 from cell TATCCTAGTTACACTG-1_4"
## [1] "Starting walk 20000 from cell TTTCATGCAGGTGTTT-1_4"
## [1] "Starting walk 25000 from cell CCTCCAAAGGATGAGA-1_2"
## [1] "Starting walk 30000 from cell AAATGGATCAAGTTGC-1_1"
## [1] "Starting walk 35000 from cell TCCTGCAGTCGTCTCT-1_3"
## [1] "Starting walk 40000 from cell TGCAGATGTGTCTAAC-1_2"
## [1] "Starting walk 45000 from cell TAGGAGGGTACTCGCG-1_4"
## [1] "Starting walk 50000 from cell TCGACCTAGGTCTTTG-1_2"
## [1] "2025-09-02 18:23:58.77152 - Starting random walks from tip 5"
## [1] "Starting walk 5000 from cell CGTGTCTTCTGTGCGG-1_4"
## [1] "Starting walk 10000 from cell TCATACTCATGAGATA-1_3"
## [1] "Starting walk 15000 from cell GTGTTCCTCGGAGTGA-1_1"
## [1] "Starting walk 20000 from cell ATGAAAGTCTTGGCTC-1_4"
## [1] "Starting walk 25000 from cell ATTCTACAGCGACTGA-1_4"
## [1] "Starting walk 30000 from cell AGGGTCCTCTATTCGT-1_4"
## [1] "Starting walk 35000 from cell TATCGCCTCTAGTTCT-1_3"
## [1] "Starting walk 40000 from cell GGAAGTGTCCTTCTGG-1_4"
## [1] "Starting walk 45000 from cell GTATTGGTCGGCCAAC-1_4"
## [1] "Starting walk 50000 from cell AATTCCTCAGAGTGAC-1_4"
## [1] "2025-09-02 18:33:51.226001 - Starting random walks from tip 6"
## [1] "Starting walk 5000 from cell TCGTAGATCTTCACGC-1_3"
## [1] "Starting walk 10000 from cell TGGGCTGCAGTCGGTC-1_4"
## [1] "Starting walk 15000 from cell TAATTCCAGGTAAAGG-1_4"
## [1] "Starting walk 20000 from cell CTGGACGAGAACTTCC-1_1"
## [1] "Starting walk 25000 from cell TTCAATCAGGAGCTGT-1_3"
## [1] "Starting walk 30000 from cell TAGACCAAGACCATGG-1_1"
## [1] "Starting walk 35000 from cell ATTACCTTCGTGGACC-1_3"
## [1] "Starting walk 40000 from cell CTATAGGCAGGATCTT-1_4"
## [1] "Starting walk 45000 from cell AGGTTGTTCAGGGTAG-1_4"
## [1] "Starting walk 50000 from cell CTGCAGGAGGCGTTGA-1_4"
## [1] "2025-09-02 18:41:33.375147 - Starting random walks from tip 7"
## [1] "Starting walk 5000 from cell CAAGCTAAGAGTCGAC-1_2"
## [1] "Starting walk 10000 from cell GATCAGTGTGGCAACA-1_1"
## [1] "Starting walk 15000 from cell GAGGGATGTTTCACTT-1_1"
## [1] "Starting walk 20000 from cell CCTTTGGTCGGTCATA-1_4"
## [1] "Starting walk 25000 from cell TGAGGTTCAGCAAGAC-1_4"
## [1] "Starting walk 30000 from cell CCACAAAGTTATAGAG-1_2"
## [1] "Starting walk 35000 from cell GGGAGATAGGCACAAC-1_4"
## [1] "Starting walk 40000 from cell AGAGCAGTCCTGCCAT-1_2"
## [1] "Starting walk 45000 from cell TGGGAAGTCGGTTCAA-1_1"
## [1] "Starting walk 50000 from cell GTGGGAATCGTAGTCA-1_1"
## [1] "2025-09-02 18:51:37.700652 - Starting random walks from tip 8"
## [1] "Starting walk 5000 from cell ACCCTCAGTTATAGAG-1_2"
## [1] "Starting walk 10000 from cell CAAGCTAGTCAGATTC-1_1"
## [1] "Starting walk 15000 from cell AGCTACACAATTTCTC-1_2"
## [1] "Starting walk 20000 from cell AATCACGCAATGACCT-1_2"
## [1] "Starting walk 25000 from cell TGCGATAGTACGGATG-1_1"
## [1] "Starting walk 30000 from cell CATTGAGAGCTGCGAA-1_2"
## [1] "Starting walk 35000 from cell TGCGATAGTACGGATG-1_1"
## [1] "Starting walk 40000 from cell TCATACTCATGACAGG-1_1"
## [1] "Starting walk 45000 from cell AGCATCACACTCAGAT-1_2"
## [1] "Starting walk 50000 from cell CAAGACTCAAGGAGTC-1_2"
## [1] "2025-09-02 18:58:52.303596 - Starting random walks from tip 9"
## [1] "Starting walk 5000 from cell TCAGGTACACCAAAGG-1_1"
## [1] "Starting walk 10000 from cell GCCATGGTCCCGAACG-1_2"
## [1] "Starting walk 15000 from cell GGTAACTAGACAGCTG-1_4"
## [1] "Starting walk 20000 from cell GGCACGTGTGGCCTCA-1_2"
## [1] "Starting walk 25000 from cell GAATAGAGTCAGTCTA-1_4"
## [1] "Starting walk 30000 from cell ATAGAGATCGAATGCT-1_2"
## [1] "Starting walk 35000 from cell GTCGTTCAGAGTGAAG-1_3"
## [1] "Starting walk 40000 from cell GATGGAGGTGCAACGA-1_2"
## [1] "Starting walk 45000 from cell TATTTCGGTATCATGC-1_2"
## [1] "Starting walk 50000 from cell GATGTTGGTTCCGTTC-1_1"
# Process the biased random walks into visitation frequencies
urd.obj <- processRandomWalksFromTips(urd.obj, walks, verbose = T)
## [1] "2025-09-02 19:03:52.008122 - Processing walks from tip 1"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:04:05.9 - Processing walks from tip 10"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:04:17.586497 - Processing walks from tip 11"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:04:31.852322 - Processing walks from tip 12"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:04:42.24823 - Processing walks from tip 13"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:04:56.651984 - Processing walks from tip 14"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:05:08.468842 - Processing walks from tip 16"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:05:20.124682 - Processing walks from tip 18"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:05:32.402157 - Processing walks from tip 2"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:05:45.514939 - Processing walks from tip 20"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:06:01.202238 - Processing walks from tip 22"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:06:14.35276 - Processing walks from tip 23"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:06:26.148721 - Processing walks from tip 24"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:06:37.817956 - Processing walks from tip 25"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:06:51.152977 - Processing walks from tip 27"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:07:05.30776 - Processing walks from tip 29"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:07:16.756179 - Processing walks from tip 30"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:07:29.465666 - Processing walks from tip 36"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:07:42.663768 - Processing walks from tip 37"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:07:55.078799 - Processing walks from tip 39"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:08:08.058252 - Processing walks from tip 4"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:08:22.792809 - Processing walks from tip 40"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:08:36.239008 - Processing walks from tip 41"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:08:48.791423 - Processing walks from tip 42"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:09:03.496557 - Processing walks from tip 44"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:09:18.033966 - Processing walks from tip 45"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:09:35.28674 - Processing walks from tip 46"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:09:51.103032 - Processing walks from tip 47"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:10:02.267667 - Processing walks from tip 5"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:10:18.353671 - Processing walks from tip 6"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:10:32.513936 - Processing walks from tip 7"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:10:45.662781 - Processing walks from tip 8"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:10:57.828556 - Processing walks from tip 9"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."

We can plot visitation frequencies from each sub cluster to determine which sub-clusters we want to use/combine for each neuron cell type. Here we wrote a function to plot all visitation frequencies from each subcluster of a neural type.

# make a function to create a plot for each curatedIdent showing the visitation
# frequencies for every subcluster in that ident
plot.multiple.visitation.freqs <- function(curatedIdent) {
    # make plotlist:
    tip.clusters.plot <- tip.sub.clusters[[curatedIdent]]
    plot.list <- lapply(tip.clusters.plot, function(x) {
        # get the visitation column to plot from urd.obj@diff.data
        visitation.column <- paste0("visitfreq.log.", x)
        # make plot title:
        title <- paste0(curatedIdent, ": Log10 Visitation: tip ", x)
        # make plot:
        plot <- plotDim(urd.obj, visitation.column, plot.title = title)
        return(plot)
    })

    ncol <- ceiling(sqrt(length(plot.list)))

    plot <- cowplot::plot_grid(plotlist = plot.list, ncol = ncol)

    return(plot)
}


plot.multiple.visitation.freqs("ec3C")

Build the tree

Combine tip walks

To build the tree, we will combine tips and random walks from the sub-clusters. Because buildTree() is currently destructive, we will make a new object.

tree <- loadTipCells(urd.obj, "tip.clusters")
tree <- processRandomWalksFromTips(tree, walks, verbose = T)
## [1] "2025-09-02 19:11:11.130881 - Processing walks from tip 1"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:11:25.025338 - Processing walks from tip 10"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:11:37.312374 - Processing walks from tip 11"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:11:50.670577 - Processing walks from tip 12"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:12:01.595344 - Processing walks from tip 13"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:12:16.329228 - Processing walks from tip 14"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:12:29.023101 - Processing walks from tip 16"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:12:41.44749 - Processing walks from tip 18"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:12:53.818178 - Processing walks from tip 2"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:13:07.016337 - Processing walks from tip 20"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:13:23.034797 - Processing walks from tip 22"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:13:36.820716 - Processing walks from tip 23"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:13:49.113087 - Processing walks from tip 24"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:14:01.212611 - Processing walks from tip 25"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:14:15.095592 - Processing walks from tip 27"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:14:30.349222 - Processing walks from tip 29"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:14:42.366758 - Processing walks from tip 30"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:14:55.455317 - Processing walks from tip 36"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:15:08.917256 - Processing walks from tip 37"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:15:22.028085 - Processing walks from tip 39"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:15:35.950376 - Processing walks from tip 4"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:15:50.837746 - Processing walks from tip 40"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:16:04.560569 - Processing walks from tip 41"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:16:18.522544 - Processing walks from tip 42"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:16:31.977513 - Processing walks from tip 44"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:16:45.041763 - Processing walks from tip 45"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:17:00.26175 - Processing walks from tip 46"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:17:14.141077 - Processing walks from tip 47"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:17:24.021684 - Processing walks from tip 5"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:17:38.700466 - Processing walks from tip 6"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:17:51.40933 - Processing walks from tip 7"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:18:05.172678 - Processing walks from tip 8"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."
## [1] "2025-09-02 19:18:18.344238 - Processing walks from tip 9"
## [1] "Calculating pseudotime with 5000 walks."
## [1] "Calculating pseudotime with 10000 walks."
## [1] "Calculating pseudotime with 15000 walks."
## [1] "Calculating pseudotime with 20000 walks."
## [1] "Calculating pseudotime with 25000 walks."
## [1] "Calculating pseudotime with 30000 walks."
## [1] "Calculating pseudotime with 35000 walks."
## [1] "Calculating pseudotime with 40000 walks."
## [1] "Calculating pseudotime with 45000 walks."
## [1] "Calculating pseudotime with 50000 walks."

The function combineTipVisitation() will combine random walks from several tips. However, it only combines two walks, and several cell types have multiple tip clusters that we will use. We therefore write a for loop to run combineTipVisitation() iteratively on all sub-clusters being used as tips in each cell type.

# Make a list of tips subclusters to use for each curatedIdent:
tips.use <- list(ec4A = c("13", "5", "42"), ec2 = c("1", "44"), ec1B = c("25", "11"),
    ec1E = "45", ec1D = "14", ec5 = c("9", "16"), en3 = c("39", "6", "40", "37"),
    en1 = c("12", "24"), en2 = c("10", "30", "23", "18"), ec3A = c("29", "41"), ec3B = "8",
    ec3C = c("27", "7"))



# make a function to combine tips for each cluster: using
# combineTipVisitation() from URD, but it only combines 2 tips at a time, so
# for clusters where we are combining more than 2 tips, it will run a for loop
# to iteratively combine each tip.
combine.multiple.tips <- function(tree.obj, curatedIdent) {
    tips.combine <- tips.use[[curatedIdent]]
    n.tips <- length(tips.combine)
    new.tip.name <- tips.combine[1]
    if (n.tips == 1)
        return(tree.obj) else {
        for (i in 2:n.tips) {
            tree.obj <- combineTipVisitation(tree.obj, new.tip.name, tips.combine[i],
                new.tip = new.tip.name)
        }
        return(tree.obj)
    }
}

# Run it on each cluster
for (x in names(tips.use)) {
    tree <- combine.multiple.tips(tree.obj = tree, curatedIdent = x)
}

# get tip names of combined tips:
tip.names <- sapply(names(tips.use), function(x) return(tips.use[[x]][1]))

# make a new plot showing tip cells used:
tree@group.ids$final.tips <- NA
for (t in tip.names) {
    tree@group.ids[tree@tree$cells.in.tip[[t]], "final.tips"] <- t
}

# plot final tip cells:
plotDim(tree, "final.tips", label.clusters = T, legend = F, plot.title = "Tip Clusters combined to use in Tree")

Build the tree

Here, we use the combined random walks to build the final trajectory tree.

tips.build <- tip.names[c("ec4A", "ec2", "ec5", "en3", "en1", "en2", "ec3A", "ec3B",
    "ec3C", "ec1E", "ec1D", "ec1B")]

tree <- buildTree(tree, pseudotime = "pseudotime", divergence.method = "preference",
    save.all.breakpoint.info = T, tips.use = tips.build, cells.per.pseudotime.bin = 25,
    bins.per.pseudotime.window = 5, p.thresh = 1e-06, min.cells.per.segment = 10,
    save.breakpoint.plots = NULL, verbose = T)
## [1] "Calculating divergence between 13 and 1 (Pseudotime 0 to 0.586)"
## [1] "Calculating divergence between 13 and 9 (Pseudotime 0 to 0.457)"
## [1] "Calculating divergence between 13 and 39 (Pseudotime 0 to 0.493)"
## [1] "Calculating divergence between 13 and 12 (Pseudotime 0 to 0.468)"
## [1] "Calculating divergence between 13 and 10 (Pseudotime 0 to 0.496)"
## [1] "Calculating divergence between 13 and 29 (Pseudotime 0 to 0.46)"
## [1] "Calculating divergence between 13 and 8 (Pseudotime 0 to 0.522)"
## [1] "Calculating divergence between 13 and 27 (Pseudotime 0 to 0.602)"
## [1] "Calculating divergence between 13 and 45 (Pseudotime 0 to 0.602)"
## [1] "Calculating divergence between 13 and 14 (Pseudotime 0 to 0.516)"
## [1] "Calculating divergence between 13 and 25 (Pseudotime 0 to 0.548)"
## [1] "Calculating divergence between 1 and 9 (Pseudotime 0 to 0.457)"
## [1] "Calculating divergence between 1 and 39 (Pseudotime 0 to 0.493)"
## [1] "Calculating divergence between 1 and 12 (Pseudotime 0 to 0.468)"
## [1] "Calculating divergence between 1 and 10 (Pseudotime 0 to 0.496)"
## [1] "Calculating divergence between 1 and 29 (Pseudotime 0 to 0.46)"
## [1] "Calculating divergence between 1 and 8 (Pseudotime 0 to 0.522)"
## [1] "Calculating divergence between 1 and 27 (Pseudotime 0 to 0.586)"
## [1] "Calculating divergence between 1 and 45 (Pseudotime 0 to 0.586)"
## [1] "Calculating divergence between 1 and 14 (Pseudotime 0 to 0.516)"
## [1] "Calculating divergence between 1 and 25 (Pseudotime 0 to 0.548)"
## [1] "Calculating divergence between 9 and 39 (Pseudotime 0 to 0.457)"
## [1] "Calculating divergence between 9 and 12 (Pseudotime 0 to 0.457)"
## [1] "Calculating divergence between 9 and 10 (Pseudotime 0 to 0.457)"
## [1] "Calculating divergence between 9 and 29 (Pseudotime 0 to 0.457)"
## [1] "Calculating divergence between 9 and 8 (Pseudotime 0 to 0.457)"
## [1] "Calculating divergence between 9 and 27 (Pseudotime 0 to 0.457)"
## [1] "Calculating divergence between 9 and 45 (Pseudotime 0 to 0.457)"
## [1] "Calculating divergence between 9 and 14 (Pseudotime 0 to 0.457)"
## [1] "Calculating divergence between 9 and 25 (Pseudotime 0 to 0.457)"
## [1] "Calculating divergence between 39 and 12 (Pseudotime 0 to 0.468)"
## [1] "Calculating divergence between 39 and 10 (Pseudotime 0 to 0.493)"
## [1] "Calculating divergence between 39 and 29 (Pseudotime 0 to 0.46)"
## [1] "Calculating divergence between 39 and 8 (Pseudotime 0 to 0.493)"
## [1] "Calculating divergence between 39 and 27 (Pseudotime 0 to 0.493)"
## [1] "Calculating divergence between 39 and 45 (Pseudotime 0 to 0.493)"
## [1] "Calculating divergence between 39 and 14 (Pseudotime 0 to 0.493)"
## [1] "Calculating divergence between 39 and 25 (Pseudotime 0 to 0.493)"
## [1] "Calculating divergence between 12 and 10 (Pseudotime 0 to 0.468)"
## [1] "Calculating divergence between 12 and 29 (Pseudotime 0 to 0.46)"
## [1] "Calculating divergence between 12 and 8 (Pseudotime 0 to 0.468)"
## [1] "Calculating divergence between 12 and 27 (Pseudotime 0 to 0.468)"
## [1] "Calculating divergence between 12 and 45 (Pseudotime 0 to 0.468)"
## [1] "Calculating divergence between 12 and 14 (Pseudotime 0 to 0.468)"
## [1] "Calculating divergence between 12 and 25 (Pseudotime 0 to 0.468)"
## [1] "Calculating divergence between 10 and 29 (Pseudotime 0 to 0.46)"
## [1] "Calculating divergence between 10 and 8 (Pseudotime 0 to 0.496)"
## [1] "Calculating divergence between 10 and 27 (Pseudotime 0 to 0.496)"
## [1] "Calculating divergence between 10 and 45 (Pseudotime 0 to 0.496)"
## [1] "Calculating divergence between 10 and 14 (Pseudotime 0 to 0.496)"
## [1] "Calculating divergence between 10 and 25 (Pseudotime 0 to 0.496)"
## [1] "Calculating divergence between 29 and 8 (Pseudotime 0 to 0.46)"
## [1] "Calculating divergence between 29 and 27 (Pseudotime 0 to 0.46)"
## [1] "Calculating divergence between 29 and 45 (Pseudotime 0 to 0.46)"
## [1] "Calculating divergence between 29 and 14 (Pseudotime 0 to 0.46)"
## [1] "Calculating divergence between 29 and 25 (Pseudotime 0 to 0.46)"
## [1] "Calculating divergence between 8 and 27 (Pseudotime 0 to 0.522)"
## [1] "Calculating divergence between 8 and 45 (Pseudotime 0 to 0.522)"
## [1] "Calculating divergence between 8 and 14 (Pseudotime 0 to 0.516)"
## [1] "Calculating divergence between 8 and 25 (Pseudotime 0 to 0.522)"
## [1] "Calculating divergence between 27 and 45 (Pseudotime 0 to 0.627)"
## [1] "Calculating divergence between 27 and 14 (Pseudotime 0 to 0.516)"
## [1] "Calculating divergence between 27 and 25 (Pseudotime 0 to 0.548)"
## [1] "Calculating divergence between 45 and 14 (Pseudotime 0 to 0.516)"
## [1] "Calculating divergence between 45 and 25 (Pseudotime 0 to 0.548)"
## [1] "Calculating divergence between 14 and 25 (Pseudotime 0 to 0.516)"
## [1] "Joining segments 8 and 27 at pseudotime 0.368 to create segment 46"
## [1] "Calculating divergence between 13 and 46 (Pseudotime 0 to 0.368)"
## [1] "Calculating divergence between 1 and 46 (Pseudotime 0 to 0.368)"
## [1] "Calculating divergence between 9 and 46 (Pseudotime 0 to 0.368)"
## [1] "Calculating divergence between 39 and 46 (Pseudotime 0 to 0.368)"
## [1] "Calculating divergence between 12 and 46 (Pseudotime 0 to 0.368)"
## [1] "Calculating divergence between 10 and 46 (Pseudotime 0 to 0.368)"
## [1] "Calculating divergence between 29 and 46 (Pseudotime 0 to 0.368)"
## [1] "Calculating divergence between 45 and 46 (Pseudotime 0 to 0.368)"
## [1] "Calculating divergence between 14 and 46 (Pseudotime 0 to 0.368)"
## [1] "Calculating divergence between 25 and 46 (Pseudotime 0 to 0.368)"
## [1] "Joining segments 45 and 25 at pseudotime 0.326 to create segment 47"
## [1] "Calculating divergence between 13 and 47 (Pseudotime 0 to 0.326)"
## [1] "Calculating divergence between 1 and 47 (Pseudotime 0 to 0.326)"
## [1] "Calculating divergence between 9 and 47 (Pseudotime 0 to 0.326)"
## [1] "Calculating divergence between 39 and 47 (Pseudotime 0 to 0.326)"
## [1] "Calculating divergence between 12 and 47 (Pseudotime 0 to 0.326)"
## [1] "Calculating divergence between 10 and 47 (Pseudotime 0 to 0.326)"
## [1] "Calculating divergence between 29 and 47 (Pseudotime 0 to 0.326)"
## [1] "Calculating divergence between 14 and 47 (Pseudotime 0 to 0.326)"
## [1] "Calculating divergence between 46 and 47 (Pseudotime 0 to 0.326)"
## [1] "Joining segments 29 and 46 at pseudotime 0.32 to create segment 48"
## [1] "Calculating divergence between 13 and 48 (Pseudotime 0 to 0.32)"
## [1] "Calculating divergence between 1 and 48 (Pseudotime 0 to 0.32)"
## [1] "Calculating divergence between 9 and 48 (Pseudotime 0 to 0.32)"
## [1] "Calculating divergence between 39 and 48 (Pseudotime 0 to 0.32)"
## [1] "Calculating divergence between 12 and 48 (Pseudotime 0 to 0.32)"
## [1] "Calculating divergence between 10 and 48 (Pseudotime 0 to 0.32)"
## [1] "Calculating divergence between 14 and 48 (Pseudotime 0 to 0.32)"
## [1] "Calculating divergence between 47 and 48 (Pseudotime 0 to 0.32)"
## [1] "Joining segments 14 and 47 at pseudotime 0.286 to create segment 49"
## [1] "Calculating divergence between 13 and 49 (Pseudotime 0 to 0.286)"
## [1] "Calculating divergence between 1 and 49 (Pseudotime 0 to 0.286)"
## [1] "Calculating divergence between 9 and 49 (Pseudotime 0 to 0.286)"
## [1] "Calculating divergence between 39 and 49 (Pseudotime 0 to 0.286)"
## [1] "Calculating divergence between 12 and 49 (Pseudotime 0 to 0.286)"
## [1] "Calculating divergence between 10 and 49 (Pseudotime 0 to 0.286)"
## [1] "Calculating divergence between 48 and 49 (Pseudotime 0 to 0.286)"
## [1] "Joining segments 39 and 12 at pseudotime 0.282 to create segment 50"
## [1] "Calculating divergence between 13 and 50 (Pseudotime 0 to 0.282)"
## [1] "Calculating divergence between 1 and 50 (Pseudotime 0 to 0.282)"
## [1] "Calculating divergence between 9 and 50 (Pseudotime 0 to 0.282)"
## [1] "Calculating divergence between 10 and 50 (Pseudotime 0 to 0.282)"
## [1] "Calculating divergence between 48 and 50 (Pseudotime 0 to 0.282)"
## [1] "Calculating divergence between 49 and 50 (Pseudotime 0 to 0.282)"
## [1] "Joining segments 13 and 1 at pseudotime 0.257 to create segment 51"
## [1] "Calculating divergence between 9 and 51 (Pseudotime 0 to 0.257)"
## [1] "Calculating divergence between 10 and 51 (Pseudotime 0 to 0.257)"
## [1] "Calculating divergence between 48 and 51 (Pseudotime 0 to 0.257)"
## [1] "Calculating divergence between 49 and 51 (Pseudotime 0 to 0.257)"
## [1] "Calculating divergence between 50 and 51 (Pseudotime 0 to 0.257)"
## [1] "Joining segments 49 and 51 at pseudotime 0.245 to create segment 52"
## [1] "Calculating divergence between 9 and 52 (Pseudotime 0 to 0.245)"
## [1] "Calculating divergence between 10 and 52 (Pseudotime 0 to 0.245)"
## [1] "Calculating divergence between 48 and 52 (Pseudotime 0 to 0.245)"
## [1] "Calculating divergence between 50 and 52 (Pseudotime 0 to 0.245)"
## [1] "Joining segments 48 and 52 at pseudotime 0.227 to create segment 53"
## [1] "Calculating divergence between 9 and 53 (Pseudotime 0 to 0.227)"
## [1] "Calculating divergence between 10 and 53 (Pseudotime 0 to 0.227)"
## [1] "Calculating divergence between 50 and 53 (Pseudotime 0 to 0.227)"
## [1] "Joining segments 10 and 50 at pseudotime 0.13 to create segment 54"
## [1] "Calculating divergence between 9 and 54 (Pseudotime 0 to 0.13)"
## [1] "Calculating divergence between 53 and 54 (Pseudotime 0 to 0.13)"
## [1] "Joining segments 53 and 54 at pseudotime 0.11 to create segment 55"
## [1] "Calculating divergence between 9 and 55 (Pseudotime 0 to 0.11)"
## [1] "Joining segments 9 and 55 at pseudotime 0.109 to create segment 56"
## [1] "Assigning cells to segments."
## [1] "Collapsing short segments."
## [1] "Removing singleton segments."
## [1] "Reassigning cells to segments."
## [1] "Assigning cells to nodes."
## [1] "Laying out tree."
## [1] "Adding cells to tree."
tree <- nameSegments(tree, segments = tips.build, segment.names = names(tips.build),
    short.names = names(tips.build))

plotTree(tree, label = "curatedIdent", discrete.colors = curated.ident.colors)

Now that the tree has been built, we will replace the Seurat batch corrected data with the SCT normalized data.

# reload seurat object:
seurat.obj <- readRDS("/data/CSD/Michael/hydra/objects/hydra.neuron.seurat_updated.HML.RDS")
# add SCT data:
tree@logupx.data <- seurat.obj@assays$SCT$data[, colnames(tree@logupx.data)]

Plotting

Gene expression

We can plot gene gene expression along the tree:

plotTree(tree, "G022640", title = "gata3 (G022640)") | plotTree(tree, "G004115",
    title = "hym355 (G004115)")

To plot dual gene expression along the tree as can be seen in the paper, we use a function that will be implemented in future versions of URD:

# Dual color plot function:

# This Function takes a dataframe with two genes scaled from 0-1, two color
# hues, and finds the hue that represents the average of their expression /
# cell.
dual.color.transformation.greymix <- function(x, color.1, color.2, base.grey = 0.8) {
    x$r <- scales::rescale(sqrt(x$gene.1.scaled^2 + x$gene.2.scaled^2), to = c(0,
        1))
    x$theta <- atan2(x$gene.2.scaled, x$gene.1.scaled)
    x$hue <- (x$theta/(pi/2) * (color.2 - color.1)) + color.1
    x[x$hue >= 360, "hue"] <- x[x$hue >= 360, "hue"] - 360
    x.bold.color <- as(colorspace::HSV(S = 1, H = x$hue, V = 1), "RGB")
    x <- cbind(x, as(colorspace::HSV(S = 1, H = x$hue, V = 1), "RGB")@coords)
    x$R.use <- (x$r * x$R) + ((1 - x$r) * base.grey)
    x$G.use <- (x$r * x$G) + ((1 - x$r) * base.grey)
    x$B.use <- (x$r * x$B) + ((1 - x$r) * base.grey)
    x$color <- rgb(red = x$R.use, green = x$G.use, blue = x$B.use, alpha = 1)
    return(x)
}

# This function is building off of plotTreeDual() but using
# dual.color.transformation.greymix() to plot the two genes instead of just
# using rgb values.
plotTreeDual.recolored <- function(object, label.1, label.2, label.1.legend = label.1,
    label.2.legend = label.2, label.type.1 = "search", label.type.2 = "search", color.1 = 240,
    color.2 = 390, title = NULL, legend = T, legend.title = "", plot.tree = T, tree.alpha = 1,
    tree.size = 1, plot.cells = T, cell.alpha = 0.25, cell.size = 0.5, label.x = T,
    label.segments = F, color.tree = T, color.limits.1 = NULL, color.limits.2 = NULL,
    hide.y.ticks = T, legend.size = 1/5.5, legend.offset.x = 0, legend.offset.y = 0,
    legend.position = "UR") {

    # Validation of parameters
    if (class(object) != "URD")
        stop("Must provide an URD object as input to plotTree.")
    if (length(object@tree) == 0)
        stop("A tree has not been calculated for this URD object. buildTree must be run first.")

    # Grab various layouts from the object
    segment.layout <- object@tree$segment.layout
    tree.layout <- object@tree$tree.layout
    if (plot.cells)
        cell.layout <- object@tree$cell.layout

    # Create title if needed
    if (is.null(title))
        title <- paste(label.1.legend, " vs.", label.2.legend)

    # Initialize ggplot and do basic formatting
    the.plot <- ggplot()
    if (hide.y.ticks) {
        the.plot <- the.plot + scale_y_reverse(c(1, 0), name = "Pseudotime", breaks = NULL)
    } else {
        the.plot <- the.plot + scale_y_reverse(c(1, 0), name = "Pseudotime", breaks = seq(0,
            1, 0.1))
    }
    the.plot <- the.plot + theme_bw() + theme(axis.ticks = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
    the.plot <- the.plot + labs(x = "", title = title, color = legend.title)

    # Grab data to color by
    label.1.data <- data.for.plot(object, label = label.1, label.type = label.type.1,
        as.color = F, as.discrete.list = T, cells.use = rownames(object@diff.data))
    label.2.data <- data.for.plot(object, label = label.2, label.type = label.type.2,
        as.color = F, as.discrete.list = T, cells.use = rownames(object@diff.data))
    if (label.1.data$discrete || label.2.data$discrete)
        stop("Cannot use discrete labels in dual-color plots.")
    color.data <- data.frame(cell = rownames(object@diff.data), gene.1 = label.1.data$data,
        gene.2 = label.2.data$data, node = object@diff.data[, "node"], stringsAsFactors = F)

    # Get the color values: Build out the color values for these data
    label.1.max <- quantile(color.data$gene.1[color.data$gene.1 > 0], prob = 0.975,
        na.rm = T)
    label.2.max <- quantile(color.data$gene.2[color.data$gene.2 > 0], prob = 0.975,
        na.rm = T)
    if (is.na(label.1.max))
        label.1.max <- 1
    if (is.na(label.2.max))
        label.2.max <- 1
    color.data$gene.1.scaled <- scales::squish(scales::rescale(color.data$gene.1,
        from = c(0, label.1.max)), c(0, 1))
    color.data$gene.2.scaled <- scales::squish(scales::rescale(color.data$gene.2,
        from = c(0, label.2.max)), c(0, 1))

    # get colors for cells:
    color.data <- dual.color.transformation.greymix(x = color.data, color.1 = color.1,
        color.2 = color.2)

    # get the color for nodes:
    if (plot.tree) {
        # Mean expression per node
        node.data <- aggregate(color.data[, c("gene.1", "gene.2")], by = list(color.data$node),
            FUN = mean.of.logs)
        rownames(node.data) <- node.data$Group.1
        node.data$n <- unlist(lapply(object@tree$cells.in.nodes, length))[node.data$Group.1]

        # Color segments according to their expression of their end node
        # (Replace -0 nodes with -1 for getting expression data.)
        tree.layout$node.1 <- gsub("-0", "-1", tree.layout$node.1)
        tree.layout$node.2 <- gsub("-0", "-1", tree.layout$node.2)
        tree.layout[, "gene.1"] <- node.data[tree.layout$node.2, "gene.1"]
        tree.layout[, "gene.2"] <- node.data[tree.layout$node.2, "gene.2"]

        # scale the data:
        tree.layout$gene.1.scaled <- scales::squish(scales::rescale(tree.layout$gene.1,
            from = c(0, label.1.max)), c(0, 1))
        tree.layout$gene.2.scaled <- scales::squish(scales::rescale(tree.layout$gene.2,
            from = c(0, label.2.max)), c(0, 1))

        # merge the colors:
        tree.layout <- dual.color.transformation.greymix(x = tree.layout, color.1 = color.1,
            color.2 = color.2)
    }

    # Add cells to graph
    if (plot.cells) {
        cell.layout$color <- color.data[cell.layout$cell, "color"]
        the.plot <- the.plot + geom_point(data = cell.layout, aes(x = x, y = y),
            color = cell.layout$color, alpha = cell.alpha, size = cell.size)
    }

    # Add tree to graph
    if (plot.tree) {
        if (color.tree) {
            # With color, if desired
            the.plot <- the.plot + geom_segment(data = tree.layout, aes(x = x1, y = y1,
                xend = x2, yend = y2), color = tree.layout$color, alpha = tree.alpha,
                size = tree.size, lineend = "square")
        } else {
            # Just plain black if no label
            the.plot <- the.plot + geom_segment(data = tree.layout, aes(x = x1, y = y1,
                xend = x2, yend = y2), color = "#333333", alpha = tree.alpha, size = tree.size,
                lineend = "square")
        }
    }

    # add legend
    if (legend) {
        if (plot.cells) {
            legend.data <- cell.layout
        } else if (plot.tree) {
            legend.data <- tree.layout
        }
        # Make legend:
        if (legend.position %in% c("UR", "LR")) {
            leg.x.max <- max(legend.data[, "x"]) + legend.offset.x
            leg.x.min <- leg.x.max - (diff(range(legend.data[, "x"])) * legend.size) +
                legend.offset.x
        } else {
            leg.x.min <- min(legend.data[, "x"]) - legend.offset.x
            leg.x.max <- leg.x.min + (diff(range(legend.data[, "x"])) * legend.size) -
                legend.offset.x
        }
        if (legend.position %in% c("LR", "LL")) {
            leg.y.min <- max(legend.data[, "y"]) - legend.offset.y
            leg.y.max <- leg.y.min - (diff(range(legend.data[, "y"])) * legend.size) +
                legend.offset.y
        } else {
            leg.y.max <- min(legend.data[, "y"]) + legend.offset.y
            leg.y.min <- leg.y.max + (diff(range(legend.data[, "y"])) * legend.size) -
                legend.offset.y
        }

        # Fill in the rest of the legend.
        leg.gene.1 <- round(seq(0, label.1.max, length.out = 6), digits = 2)
        leg.gene.2 <- round(seq(0, label.2.max, length.out = 6), digits = 2)
        leg.gene.scaled <- seq(0, 1, length.out = 6)
        leg.x.breaks <- seq(leg.x.min, leg.x.max, length.out = 10)
        leg.y.breaks <- seq(leg.y.min, leg.y.max, length.out = 10)
        legend.squares <- data.frame(stringsAsFactors = F, gene.1 = rep(leg.gene.1,
            each = 6), gene.2 = rep(leg.gene.2, 6), gene.1.scaled = rep(leg.gene.scaled,
            each = 6), gene.2.scaled = rep(leg.gene.scaled, 6), x.1 = rep(leg.x.breaks[2:7],
            each = 6), x.2 = rep(leg.x.breaks[3:8], each = 6), y.1 = rep(leg.y.breaks[2:7],
            6), y.2 = rep(leg.y.breaks[3:8], 6))
        legend.squares <- dual.color.transformation.greymix(x = legend.squares, color.1 = color.1,
            color.2 = color.2)
        side.labels <- data.frame(stringsAsFactors = F, label.x = c(leg.x.breaks[2:7],
            rep(leg.x.breaks[8], 6)), label.y = c(rep(leg.y.breaks[8], 6), leg.y.breaks[2:7]),
            label.text = as.character(c(leg.gene.1, leg.gene.2)))
        side.labels$label.x <- side.labels$label.x + (0.5 * (leg.x.breaks[2] - leg.x.breaks[1]))
        side.labels$label.y <- side.labels$label.y + (0.5 * (leg.y.breaks[2] - leg.y.breaks[1]))
        side.labels$angle <- c(rep(90, 6), rep(0, 6))
        side.labels$hjust <- 0
        side.labels$vjust <- 0.5
        side.labels$size <- 2.5
        side.labels <- rbind(side.labels, c(leg.x.breaks[5], leg.y.breaks[1], 0,
            0, 0.5, 0.5, 4))
        side.labels <- rbind(side.labels, c(leg.x.breaks[1], leg.y.breaks[5], 0,
            90, 0.5, 0.5, 4))
        side.labels[13:14, "label.text"] <- c(label.1.legend, label.2.legend)
        side.labels$color <- c(rep("black", 12), legend.squares[31, "color"], legend.squares[6,
            "color"])

        the.plot <- the.plot + geom_rect(legend.squares, mapping = aes(xmin = x.1,
            xmax = x.2, ymin = y.1, max = y.2), fill = legend.squares$color)
        the.plot <- the.plot + geom_text(data = side.labels, mapping = aes(label = label.text,
            x = label.x, y = label.y), angle = side.labels$angle, hjust = side.labels$hjust,
            vjust = side.labels$vjust, size = side.labels$size, color = side.labels$color)
    }

    # Add color
    the.plot <- the.plot + scale_color_identity()

    # Label segment names along the x-axis?
    if (label.x) {
        if ("segment.names" %in% names(object@tree)) {
            # Add segment names to segment.layout
            segment.layout$name <- object@tree$segment.names[segment.layout$segment]
            tip.layout <- segment.layout[complete.cases(segment.layout), ]
        } else {
            # Find terminal tips
            tip.layout <- segment.layout[which(segment.layout$segment %in% object@tree$tips),
                ]
            tip.layout$name <- as.character(tip.layout$segment)
        }
        the.plot <- the.plot + scale_x_continuous(breaks = as.numeric(tip.layout$x),
            labels = as.character(tip.layout$name))
        if (any(unlist(lapply(tip.layout$name, nchar)) > 2)) {
            the.plot <- the.plot + theme(axis.text.x = element_text(angle = 68, vjust = 1,
                hjust = 1))
        }
    } else {
        the.plot <- the.plot + theme(axis.text.x = element_blank())
    }

    # Label the segments with their number?
    if (label.segments) {
        segment.labels <- as.data.frame(segment.layout[, c("segment", "x")])
        segment.labels$y <- apply(object@tree$segment.pseudotime.limits, 1, num.mean)[segment.labels$segment]
        the.plot <- the.plot + geom_label(data = segment.labels, aes(x = x, y = y,
            label = segment), alpha = 0.5)
    }

    return(the.plot)
}
# plot dual expression of gata3 and hym355:
plotTreeDual.recolored(object = tree, label.1 = "G022640", label.2 = "G004115", label.1.legend = "gata3",
    label.2.legend = "hym355", cell.size = 0.75, cell.alpha = 0.5, legend.position = "LR")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Spline plots

We can also plot gene expression from root cells to a specified tip. To do this you will need to identify the segments along the tree from the root to the tip.

# label tree by segments
plotTree(tree, "segment", label.segments = T)

genes.plot <- c(myb = "G020130", bhlha15 = "G021353", gata3 = "G022640", hym355 = "G004115",
    ec3A.marker = "G021930")

# calculate average gene expression across a moving window:
progen.to.ec3A.spline <- geneSmoothFit(tree, method = "spline", pseudotime = "pseudotime",
    cells = cellsInCluster(tree, "segment", c("56", "53", "48", "29")), genes = genes.plot,
    moving.window = 1, cells.per.window = 5, spar = 0.875)
## [1] "2025-09-02 19:20:31.714: Calculating moving window expression."
## [1] "2025-09-02 19:20:38.968937: Generating un-scaled fits."
## [1] "2025-09-02 19:20:38.988407: Generating scaled fits."
## [1] "2025-09-02 19:20:39.006933: Reducing mean expression data to same dimensions as spline fits."
plotSmoothFit(progen.to.ec3A.spline, genes = genes.plot, scaled = T, multiplot = F,
    alpha.data = 0.2, plot.title = NULL) + scale_color_manual(values = c(G020130 = "forestgreen",
    G021353 = "cornflowerblue", G022640 = "#BC4643", G004115 = "darkorchid", G021930 = "goldenrod3"))